In [18]:
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [19]:
def scrape_pages(start_page, end_page):
    base_url = 'https://www.buyrentkenya.com/houses-for-sale'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    properties = []

    for page_num in range(start_page, end_page + 1):
        url = f'{base_url}?page={page_num}'
        print(f"Scraping page {page_num}: {url}")
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            continue

        soup = BeautifulSoup(response.content, 'html.parser')
        listings = soup.find_all('div', class_='listing-card')

        for listing in listings:

            # --- Extract basic info from listing card ---
            title_tag = listing.find('h2')
            title = title_tag.get_text(strip=True) if title_tag else 'No title'

            price_tag = listing.find('a', class_='pointer-events-none z-10 no-underline')
            price = price_tag.get_text(strip=True) if price_tag else 'No price'

            location_tag = listing.find('p', class_='w-full truncate font-normal capitalize')
            location = location_tag.get_text(strip=True) if location_tag else 'No location'

            
            # --- extract each property link ---
            property_tag = listing.find('a', href=True)
            property_url = urljoin(base_url, property_tag['href']) if property_tag else None

            if not property_url:
                continue


            # Swiper slides extraction (bedrooms, bathrooms, size)
            swiper_div = listing.find('div', class_='scrollable-list')

            bedrooms = bathrooms = size = 'N/A'

            if swiper_div:
                slides = swiper_div.find_all('div', class_='swiper-slide')
                for slide in slides:
                    text = slide.get_text(strip=True)
                    if 'Bedroom' in text:
                        bedrooms = text
                    elif 'Bathroom' in text:
                        bathrooms = text
                    elif 'mÂ²' in text or 'sq' in text.lower():
                        size = text

    # ===================================================
            # --- Visit the property detail page ---
            detail_response = requests.get(property_url, headers=headers)
            if detail_response.status_code != 200:
                print(f"Failed to load property page: {property_url}")
                continue


            detail_soup = BeautifulSoup(detail_response.content, "html.parser")

            created_at = "N/A"

            created_tag = detail_soup.find(
                string=lambda x: x and "Created At:" in x
            )

            if created_tag:
                created_at = created_tag.strip().replace("Created At:", "").strip()



            # --- Extract utilities and nearby facilities ---
            utilities =  []
            nearby = []

            sections = detail_soup.find_all("div", class_="px-3 py-3 even:bg-gray-50")
            for section in sections:
                title_span = section.find("span", class_="font-semibold")
                if not title_span:
                    continue

                section_name = title_span.get_text(strip=True).lower()
                items_div = section.find("div", class_="flex flex-wrap gap-3")
                if not items_div:
                    continue

                items = [span.get_text(strip=True) for span in items_div.find_all("span")]
                if "internal features" in section_name or "external features" in section_name:
                    utilities.extend(items)
                elif "nearby" in section_name:
                    nearby.extend(items)

    # ===================================================
            # --- Append property data ---
            properties.append({
                'Title': title,
                'Price': price,
                'Location': location,
                'Bedrooms': bedrooms,
                'Bathrooms': bathrooms,
                # 'Size': size_swiper,
                'Amenities': utilities,
                'Surroundings': nearby,
                'Created At': created_at
            })

            # --- Polite delay ---
            time.sleep(1)

        # Optional: stop once 500+ listings collected
        if len(properties) >= 500:
            print("Reached 500+ listings, stopping scrape.")
            break

    # Convert to DataFrame
    df = pd.DataFrame(properties)
    return df


In [26]:
df = scrape_pages(start_page=1, end_page=4)

Scraping page 1: https://www.buyrentkenya.com/houses-for-sale?page=1
Failed to retrieve the page. Status code: 404
Scraping page 2: https://www.buyrentkenya.com/houses-for-sale?page=2
Scraping page 3: https://www.buyrentkenya.com/houses-for-sale?page=3
Scraping page 4: https://www.buyrentkenya.com/houses-for-sale?page=4


In [27]:
df.head()

Unnamed: 0,Title,Price,Location,Bedrooms,Bathrooms,Amenities,Surroundings,Created At
0,5 Bed Townhouse with En Suite in Nyali Area,"KSh 29,500,000","Nyali Area, Nyali",5 Bedrooms,5 Bathrooms,"[Aircon, Alarm, Backup Generator, En Suite, Fi...","[Bus Stop, Golf Course, Hospital, Scenic View,...",09 February 2026
1,6 Bed Townhouse with En Suite in Lavington,"KSh 160,000,000",Lavington,6 Bedrooms,7 Bathrooms,"[Alarm, Backup Generator, En Suite, Fibre Inte...","[Bus Stop, Shopping Centre, Golf Course, Hospi...",06 February 2026
2,4 Bed House with En Suite in Runda,"KSh 155,040,000","Runda, Westlands",4 Bedrooms,5 Bathrooms,"[Alarm, Backup Generator, En Suite, Walk In Cl...","[Bus Stop, Shopping Centre, Hospital, Scenic V...",21 October 2025
3,5 Bed House with En Suite in Westlands Area,"KSh 98,000,000","Westlands Area, Westlands",5 Bedrooms,5 Bathrooms,"[Alarm, Backup Generator, En Suite, Fibre Inte...","[Bus Stop, Shopping Centre, Hospital, School]",17 February 2026
4,6 Bed Villa with En Suite at Loiyangalani Road,"KSh 85,000,000",Lavington,6 Bedrooms,7 Bathrooms,"[Aircon, Alarm, Backup Generator, En Suite, Fi...","[Bus Stop, Hospital, Scenic View, School, Shop...",17 February 2026


In [31]:

# Flatten lists to comma-separated strings
df['Amenities'] = df['Amenities'].apply(lambda x: ", ".join(x) if isinstance(x, list) else x)
df['Surroundings'] = df['Surroundings'].apply(lambda x: ", ".join(x) if isinstance(x, list) else x)

# Show the number of listings extracted
num_listings = len(df)
print(f"Number of listings extracted: {num_listings}")

# Save to CSV (replaces existing file if it exists)
output_file = "properties.csv"
df.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")


Number of listings extracted: 75
Data saved to properties.csv


In [29]:
# data dictionary 
# convert DataFrame to a list of dictionaries
data_dict = df.to_dict(orient="records")

# Example: print first listing
print(data_dict[0])


{'Title': '5 Bed Townhouse with En Suite in Nyali Area', 'Price': 'KSh 29,500,000', 'Location': 'Nyali Area, Nyali', 'Bedrooms': '5 Bedrooms', 'Bathrooms': '5 Bathrooms', 'Amenities': 'Aircon, Alarm, Backup Generator, En Suite, Fibre Internet, Walk In Closet, Balcony, CCTV, Electric Fence, Borehole, Garden, Gym, Parking, Swimming Pool, Gated Community, Kids Play Area', 'Surroundings': 'Bus Stop, Golf Course, Hospital, Scenic View, School, Shopping Centre', 'Created At': '09 February 2026'}


In [None]:
# data dictionary as a list of dictionaries
data_dictionary = [
    {"Column Name": "Title", "Description": "Name of property listing", "Data Type": "String", "Example": "5 Bed Townhouse with En Suite in Nyali Area"},
    {"Column Name": "Price", "Description": "Listed price", "Data Type": "String / Numeric", "Example": "KSh 29,500,000"},
    {"Column Name": "Location", "Description": "Property location", "Data Type": "String", "Example": "Nyali Area, Nyali"},
    {"Column Name": "Bedrooms", "Description": "Number of bedrooms", "Data Type": "String / Int", "Example": "5 Bedrooms"},
    {"Column Name": "Bathrooms", "Description": "Number of bathrooms", "Data Type": "String / Int", "Example": "5 Bathrooms"},
    {"Column Name": "Amenities", "Description": "Internal & external features", "Data Type": "String (comma-separated)", "Example": "Aircon, Alarm, Backup Generator"},
    {"Column Name": "Surroundings", "Description": "Nearby facilities / landmarks", "Data Type": "String (comma-separated)", "Example": "Bus Stop, Golf Course, Hospital"},
    {"Column Name": "Created At", "Description": "Date listing was created", "Data Type": "Date / String", "Example": "09 February 2026"},
]

# Convert to DataFrame
ddf = pd.DataFrame(data_dictionary)

# Print to console (optional)
print(ddf)

# Save as JSON (optional)
ddf.to_json("data_dictionary.json", orient="records", indent=2)

print("Data dictionary saved as JSON.")



    Column Name                    Description                 Data Type  \
0         Title       Name of property listing                    String   
1         Price                   Listed price          String / Numeric   
2      Location              Property location                    String   
3      Bedrooms             Number of bedrooms              String / Int   
4     Bathrooms            Number of bathrooms              String / Int   
5     Amenities   Internal & external features  String (comma-separated)   
6  Surroundings  Nearby facilities / landmarks  String (comma-separated)   
7    Created At       Date listing was created             Date / String   

                                       Example  
0  5 Bed Townhouse with En Suite in Nyali Area  
1                               KSh 29,500,000  
2                            Nyali Area, Nyali  
3                                   5 Bedrooms  
4                                  5 Bathrooms  
5              Aircon