In [2]:
pip install requests pandas beautifulsoup4 sqlalchemy lxml


Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests # Sends https requests
import pandas as pd
from bs4 import BeautifulSoup
from sqlalchemy import create_engine


In [None]:


def scrape_pages(start_page, end_page):
    base_url = 'https://www.buyrentkenya.com/houses-for-sale'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    properties = []

    for page_num in range(start_page, end_page + 1):
        url = f'{base_url}?page={page_num}'  
        print(f"Scraping page {page_num}: {url}")
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            continue  

        soup = BeautifulSoup(response.content, 'html.parser')

        listings = soup.find_all('div', class_='listing-card')  

        for listing in listings:
            # Extract title
            title_tag = listing.find('h2')
            title = title_tag.get_text(strip=True) if title_tag else 'No title'

            # Extract price
            price_tag = listing.find('p', class_='text-xl font-bold leading-7 text-grey-900')
            price = price_tag.get_text(strip=True) if price_tag else 'No price'

            # Extract location
            location_tag = listing.find('p', class_='ml-1 truncate text-sm font-normal capitalize text-grey-650')
            location = location_tag.get_text(strip=True) if location_tag else 'No location'

            # Swiper slides extraction (bedrooms, bathrooms, size)
            swiper_div = listing.find('div', class_='scrollable-list')
            bedrooms = bathrooms = size = 'N/A'
            if swiper_div:
                slides = swiper_div.find_all('div', class_='swiper-slide')
                for slide in slides:
                    text = slide.get_text(strip=True)
                    if 'Bedroom' in text:
                        bedrooms = text
                    elif 'Bathroom' in text:
                        bathrooms = text
                    elif 'm²' in text:
                        size = text

            # Append the data to the list
            properties.append({
                'Title': title,
                'Price': price,
                'Location': location,
                'Bedrooms': bedrooms,
                'Bathrooms': bathrooms,
                'Size': size
            })

# Convert to DataFrame
    df = pd.DataFrame(properties)
    return df


In [4]:
df_all_pages = scrape_pages(start_page=2, end_page=4)

Scraping page 2: https://www.buyrentkenya.com/houses-for-sale?page=2
Scraping page 3: https://www.buyrentkenya.com/houses-for-sale?page=3
Scraping page 4: https://www.buyrentkenya.com/houses-for-sale?page=4


In [9]:
df_all_pages.head()

Unnamed: 0,Title,Price,Location,Bedrooms,Bathrooms,Size
0,4 Bed Villa with En Suite in Westlands Area,"KSh 96,000,000","Westlands Area, Westlands",4 Bedrooms,,
1,5 Bed Townhouse with En Suite in Lavington,"KSh 130,000,000",Lavington,5 Bedrooms,6 Bathrooms,
2,4 Bed House with En Suite at Kiambu Road.,"KSh 20,000,000","00502, New Runda, kiambu road., Runda, Westlands",4 Bedrooms,5 Bathrooms,
3,3 Bed House with En Suite at Kangundo Road,"KSh 8,500,000","Kangundo Road, Joska, Kamulu Joska Malaa",3 Bedrooms,2 Bathrooms,130 m²
4,5 Bed House with En Suite in Runda,"KSh 240,000,000","Runda, Westlands",5 Bedrooms,7 Bathrooms,


In [7]:
pip install psycopg2-binary


Note: you may need to restart the kernel to use updated packages.


In [6]:
engine = create_engine("postgresql://postgres:admin123@localhost:5432/properties")

In [7]:
df_all_pages.to_sql('housesforsale', engine, if_exists="replace", schema="public")


75