In [20]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/116.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;"
              "q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Connection": "keep-alive"
}

all_properties = []

# Scrape all 99 pages
for page in range(401, 421):
    try:
        # Add delay between page requests
        # time.sleep(random.uniform(2, 4))
        
        url = f'https://www.zameen.com/Homes/Lahore-1-{page}.html'
        print(f"Scraping page {page}: {url}")
        
        webpage = requests.get(url, headers=headers)
        webpage.raise_for_status()
        
        soup = BeautifulSoup(webpage.content, 'html.parser')
        
        # Get all listings from the page
        main = soup.find_all('li', class_='a37d52f0')
        print(f"Found {len(main)} listings on page {page}")
        
        # Process each listing
        for i in main:
            link_tag = i.find("a", class_="d870ae17")
            if link_tag and link_tag.has_attr('href'):
                listing_url = 'https://www.zameen.com' + link_tag["href"]
                
                # Add delay between listing requests
                time.sleep(random.uniform(1, 2))
                
                print(f"Scraping listing: {listing_url}")
                
                try:
                    web = requests.get(listing_url, headers=headers)
                    web.raise_for_status()
                    
                    soup_detail = BeautifulSoup(web.content, 'html.parser')
                    
                    # Extract all data
                    property_data = {'url': listing_url}
                    
                    # Price
                    try:
                        price_element = soup_detail.find('span', class_='_63ea997b')
                        property_data['price'] = price_element.get_text(strip=True) if price_element else "N/A"
                    except:
                        property_data['price'] = "N/A"
                    
                    # Title
                    try:
                        title_element = soup_detail.find('h1', class_='aea614fd')
                        property_data['title'] = title_element.get_text(strip=True) if title_element else "N/A"
                    except:
                        property_data['title'] = "N/A"
                    
                    # Location
                    try:
                        location_element = soup_detail.find('span', class_='_2fdf7fc5', attrs={'aria-label': 'Location'})
                        property_data['location'] = location_element.get_text(strip=True) if location_element else "N/A"
                    except:
                        property_data['location'] = "N/A"
                    
                    # Type
                    try:
                        type_element = soup_detail.find('span', class_='_2fdf7fc5', attrs={'aria-label': 'Type'})
                        property_data['type'] = type_element.get_text(strip=True) if type_element else "N/A"
                    except:
                        property_data['type'] = "N/A"
                    
                    # Area
                    try:
                        area_element = soup_detail.find('span', class_='_2fdf7fc5', attrs={'aria-label': 'Area'})
                        property_data['area'] = area_element.get_text(strip=True) if area_element else "N/A"
                    except:
                        property_data['area'] = "N/A"
                    
                    # Purpose
                    try:
                        purpose_element = soup_detail.find('span', class_='_2fdf7fc5', attrs={'aria-label': 'Purpose'})
                        property_data['purpose'] = purpose_element.get_text(strip=True) if purpose_element else "N/A"
                    except:
                        property_data['purpose'] = "N/A"
                    
                    # Baths
                    try:
                        baths_element = soup_detail.find('span', class_='_2fdf7fc5', attrs={'aria-label': 'Baths'})
                        property_data['baths'] = baths_element.get_text(strip=True) if baths_element else "N/A"
                    except:
                        property_data['baths'] = "N/A"
                    
                    # Beds
                    try:
                        beds_element = soup_detail.find('span', class_='_2fdf7fc5', attrs={'aria-label': 'Beds'})
                        property_data['beds'] = beds_element.get_text(strip=True) if beds_element else "N/A"
                    except:
                        property_data['beds'] = "N/A"
                    
                    # Description
                    try:
                        description_element = soup_detail.find('span', class_='_3547dac9')
                        property_data['description'] = description_element.get_text(strip=True) if description_element else "N/A"
                    except:
                        property_data['description'] = "N/A"
                    
                    all_properties.append(property_data)
                    print(f"✓ Page {page}, Listing {main.index(i)+1}/{len(main)}: {property_data['title']}")
                    
                except Exception as e:
                    print(f"✗ Page {page}, Listing {main.index(i)+1}/{len(main)}: Failed - {e}")
                    # Save URL with error
                    property_data = {'url': listing_url, 'error': str(e)}
                    for field in ['title', 'price', 'location', 'type', 'area', 'purpose', 'baths', 'beds', 'description']:
                        property_data[field] = "N/A"
                    all_properties.append(property_data)
        
    except Exception as e:
        print(f"Failed to scrape page {page}: {e}")
        continue

# Save all data to CSV
if all_properties:
    df = pd.DataFrame(all_properties)
    # df.to_csv('zameen_properties.csv', index=False, encoding='utf-8')
    print(f"Saved {len(all_properties)} properties to zameen_properties.csv")
else:
    print("No properties were scraped.")

# Display summary
print(f"\n=== SCRAPING COMPLETE ===")
print(f"Total pages processed: 999")
print(f"Total properties saved: {len(all_properties)}")



Scraping page 401: https://www.zameen.com/Homes/Lahore-1-401.html
Found 25 listings on page 401
Scraping listing: https://www.zameen.com/Property/askari_10_askari_10_-_sector_f_spanish_style_5-bed_brigadier_house_for_sale_askari_10_sector_f-51901093-10542-1.html
✓ Page 401, Listing 1/25: Spanish Style 5-Bed Brigadier House For Sale Askari 10, Sector F
Scraping listing: https://www.zameen.com/Property/askari_11_askari_11_-_sector_d_open_view_by_the_park_gem_ideal_live-in_or_investment_apartment_must_see_opportunity-49897654-21437-1.html
✓ Page 401, Listing 2/25: Open View By The Park Gem: Ideal Live-In Or Investment Apartment Must See Opportunity
Scraping listing: https://www.zameen.com/Property/askari_11_askari_11_-_sector_b_apartments_exclusive_near_mosque_park_ground_floor_apartment_for_sale_in_prime_location_call_now-52160704-15703-1.html
✓ Page 401, Listing 3/25: Exclusive Near Mosque & Park Ground Floor Apartment For Sale In Prime Location Call Now
Scraping listing: https://www.za

In [7]:
df = pd.DataFrame(all_properties)

In [21]:
df.head()

Unnamed: 0,url,price,title,location,type,area,purpose,baths,beds,description
0,https://www.zameen.com/Property/askari_10_aska...,9.5 Crore,Spanish Style 5-Bed Brigadier House For Sale A...,"Askari, Lahore, Punjab",House,17 Marla,For Sale,6,5,Saqib Real Estate presents stunning 5-bedroom ...
1,https://www.zameen.com/Property/askari_11_aska...,3.35 Crore,Open View By The Park Gem: Ideal Live-In Or In...,"Askari, Lahore, Punjab",Flat,10 Marla,For Sale,3,3,Park-Facing 10 Marla Apartment for Sale on 8th...
2,https://www.zameen.com/Property/askari_11_aska...,3.5 Crore,Exclusive Near Mosque & Park Ground Floor Apar...,"Askari, Lahore, Punjab",Flat,12 Marla,For Sale,5,4,Saqib Real Estate Presents a Modern Marvel: Ex...
3,https://www.zameen.com/Property/park_view_city...,2.25 Crore,5 Marla House For Sale In Executive Block Park...,"Park View City, Lahore, Punjab",House,5 Marla,For Sale,4,4,Discover The Life Of Luxury Living With This S...
4,https://www.zameen.com/Property/al_kabir_town_...,1.3 Crore,3 Marla Brand New Luxury House Available In Al...,"Raiwind Road, Lahore, Punjab",House,3 Marla,For Sale,4,3,3 Marla Brand New Luxury House Available In Al...


In [22]:
df.to_csv("401-420 pages.csv")

In [23]:
df.shape

(500, 10)

In [24]:
df.isnull().sum()

url            0
price          0
title          0
location       0
type           0
area           0
purpose        0
baths          0
beds           0
description    0
dtype: int64