In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

print("All imports successful")

All imports successful


In [2]:
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--window-size=1920,1080')
options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
print("Browser launched")

Browser launched


In [4]:
def scrape_property24_page(url):
    
    listings = []
    
    driver.get(url)
    
    try:
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "p24_regularTile"))
        )
    except:
        print(f"   Page timed out: {url}")
        return listings
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    tiles = soup.find_all('div', class_='p24_regularTile')
    
    for tile in tiles:
        try:

            price_tag = tile.find('span', class_='p24_price')
            price_text = price_tag.text.strip() if price_tag else None
            price_clean = re.sub(r'[^\d]', '', price_text) if price_text else None
            price_kes = int(price_clean) if price_clean else None
            

            location_tag = tile.find('span', class_='p24_location')
            location = location_tag.text.strip() if location_tag else None
            

            type_tag = tile.find('span', class_='p24_propertyTitle')
            prop_type = type_tag.text.strip() if type_tag else None
            
            if prop_type:
                if 'Apartment' in prop_type:
                    prop_type_clean = 'Apartment'
                elif 'House' in prop_type:
                    prop_type_clean = 'House'
                elif 'Townhouse' in prop_type:
                    prop_type_clean = 'Townhouse'
                elif 'Maisonette' in prop_type:
                    prop_type_clean = 'Maisonette'
                elif 'Bungalow' in prop_type:
                    prop_type_clean = 'Bungalow'
                elif 'Studio' in prop_type:
                    prop_type_clean = 'Studio'
                elif 'Land' in prop_type:
                    prop_type_clean = 'Land'
                else:
                    prop_type_clean = prop_type
            else:
                prop_type_clean = None

            address_tag = tile.find('span', class_='p24_address')
            address = address_tag.text.strip() if address_tag else None


            beds, baths, parking = None, None, None
            
            feature_spans = tile.find_all('span', class_='p24_featureDetails')
            for span in feature_spans:
                title_attr = span.get('title', '').lower()
                number_span = span.find('span')
                value = number_span.text.strip() if number_span else None
                
                if 'bedroom' in title_attr:
                    beds = int(value) if value and value.isdigit() else None
                elif 'bathroom' in title_attr:
                    baths = int(value) if value and value.isdigit() else None
                elif 'parking' in title_attr:
                    parking = int(value) if value and value.isdigit() else None
            

            size_sqft = None
            size_tag = tile.find('span', class_='p24_size')
            if size_tag:
                size_span = size_tag.find('span')
                if size_span:
                    size_text = size_span.text.strip()  
                    size_num = re.sub(r'[^\d.]', '', size_text)
                    if size_num:
                        size_sqft = round(float(size_num) * 10.764)

            link_tag = tile.find('a')
            listing_url = 'https://www.property24.co.ke' + link_tag['href'] if link_tag else None

            if price_kes and location:
                listings.append({
                    'source': 'property24',
                    'location': location,
                    'address': address,
                    'property_type': prop_type_clean,
                    'bedrooms': beds,
                    'bathrooms': baths,
                    'parking': parking,
                    'size_sqft': size_sqft,
                    'price_kes': price_kes,
                    'listing_url': listing_url,
                    'listing_date': pd.Timestamp.today().strftime('%Y-%m-%d')
                })
                
        except Exception as e:
            print(f"  Skipped one listing: {e}")
            continue
    
    return listings

print("Scraping function defined")

Scraping function defined


In [5]:
base_url = "https://www.property24.co.ke/property-for-sale-in-nairobi-c1890?Page={}"

all_listings = []

for page_num in range(1, 21):  
    
    url = base_url.format(page_num)
    print(f"Scraping page {page_num}...", end=' ')
    
    page_listings = scrape_property24_page(url)
    all_listings.extend(page_listings)
    
    print(f"Got {len(page_listings)} | Total so far: {len(all_listings)}")
    time.sleep(3)

print(f"\n Done! Total listings collected: {len(all_listings)}")

Scraping page 1... Got 21 | Total so far: 21
Scraping page 2... Got 21 | Total so far: 42
Scraping page 3... Got 21 | Total so far: 63
Scraping page 4... Got 21 | Total so far: 84
Scraping page 5... Got 21 | Total so far: 105
Scraping page 6... Got 21 | Total so far: 126
Scraping page 7... Got 21 | Total so far: 147
Scraping page 8... Got 21 | Total so far: 168
Scraping page 9... Got 21 | Total so far: 189
Scraping page 10... Got 21 | Total so far: 210
Scraping page 11... Got 21 | Total so far: 231
Scraping page 12... Got 21 | Total so far: 252
Scraping page 13... Got 21 | Total so far: 273
Scraping page 14... Got 21 | Total so far: 294
Scraping page 15... Got 21 | Total so far: 315
Scraping page 16... Got 21 | Total so far: 336
Scraping page 17... Got 21 | Total so far: 357
Scraping page 18... Got 21 | Total so far: 378
Scraping page 19... Got 21 | Total so far: 399
Scraping page 20... Got 21 | Total so far: 420

 Done! Total listings collected: 420


In [6]:
df = pd.DataFrame(all_listings)

print(f"Shape: {df.shape}")
print(f"\nMissing values:\n{df.isnull().sum()}")
print(f"\nSample data:")
df.head()

Shape: (420, 11)

Missing values:
source            0
location          0
address           0
property_type     0
bedrooms          9
bathrooms        43
parking           1
size_sqft         0
price_kes         0
listing_url       0
listing_date      0
dtype: int64

Sample data:


Unnamed: 0,source,location,address,property_type,bedrooms,bathrooms,parking,size_sqft,price_kes,listing_url,listing_date
0,property24,Westlands,"Westlands Rd Nairobi, Westlands, Nairobi",Apartment,1.0,1.0,1.0,732,8940000,https://www.property24.co.ke/1-bedroom-apartme...,2026-02-23
1,property24,Westlands,"Westlands Rd Nairobi, Westlands, Nairobi",Apartment,1.0,1.0,1.0,732,8940000,https://www.property24.co.ke/1-bedroom-apartme...,2026-02-23
2,property24,Syokimau,"Mombasa Road Nairobi, Syokimau, Nairobi",Apartment,2.0,2.0,1.0,990,7500000,https://www.property24.co.ke/2-bedroom-apartme...,2026-02-23
3,property24,Westlands,"Rhapta Terraces Rhapta Road, Westlands, Nairobi",Apartment,1.0,1.0,1.0,753,9200000,https://www.property24.co.ke/1-bedroom-apartme...,2026-02-23
4,property24,Westlands,"Rhapta Terraces Rhapta Road, Westlands, Nairobi",Apartment,1.0,1.0,1.0,721,9000000,https://www.property24.co.ke/1-bedroom-apartme...,2026-02-23


In [8]:
df.to_csv('../data/raw_listings.csv', index=False)
print(f"Saved {len(df)} listings to data/raw_listings.csv")

Saved 420 listings to data/raw_listings.csv


In [9]:
import pandas as pd

df = pd.read_csv('../data/raw_listings.csv')
print(f"File loaded successfully")
print(f"Total rows: {len(df)}")
print(f"Total columns: {df.columns.tolist()}")
print(f"\nPrice range:")
print(f"  Min: KES {df['price_kes'].min():,.0f}")
print(f"  Max: KES {df['price_kes'].max():,.0f}")
print(f"  Median: KES {df['price_kes'].median():,.0f}")
print(f"\nTop 10 locations:")
print(df['location'].value_counts().head(10))
print(f"\nMissing values per column:")
print(df.isnull().sum())

âœ“ File loaded successfully
Total rows: 420
Total columns: ['source', 'location', 'address', 'property_type', 'bedrooms', 'bathrooms', 'parking', 'size_sqft', 'price_kes', 'listing_url', 'listing_date']

Price range:
  Min: KES 4,310,000
  Max: KES 220,000,000
  Median: KES 12,922,000

Top 10 locations:
location
Westlands       235
Kilimani         77
Kileleshwa       54
Syokimau         15
Lavington        10
Riverside         8
Runda             6
Mirema            3
Kiambu Road       3
Lower Kabete      2
Name: count, dtype: int64

Missing values per column:
source            0
location          0
address           0
property_type     0
bedrooms          9
bathrooms        43
parking           1
size_sqft         0
price_kes         0
listing_url       0
listing_date      0
dtype: int64
