In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_pages(start_page, end_page):
    base_url = 'https://www.buyrentkenya.com/houses-for-sale'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    properties = []

    for page_num in range(start_page, end_page + 1):
        url = f'{base_url}?page={page_num}'  
        print(f"Scraping page {page_num}: {url}")
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            continue  

        soup = BeautifulSoup(response.content, 'html.parser')

        listings = soup.find_all('div', class_='listing-card')  

        for listing in listings:
            # Extract title
            title_tag = listing.find('h2')
            title = title_tag.get_text(strip=True) if title_tag else 'No title'

            # Extract price
            price_tag = listing.find('p', class_='text-xl font-bold leading-7 text-grey-900')
            price = price_tag.get_text(strip=True) if price_tag else 'No price'

            # Extract location
            location_tag = listing.find('p', class_='ml-1 truncate text-sm font-normal capitalize text-grey-650')
            location = location_tag.get_text(strip=True) if location_tag else 'No location'

            # Swiper slides extraction (bedrooms, bathrooms, size)
            swiper_div = listing.find('div', class_='scrollable-list')
            bedrooms = bathrooms = size = 'N/A'
            if swiper_div:
                slides = swiper_div.find_all('div', class_='swiper-slide')
                for slide in slides:
                    text = slide.get_text(strip=True)
                    if 'Bedroom' in text:
                        bedrooms = text
                    elif 'Bathroom' in text:
                        bathrooms = text
                    elif 'mÂ²' in text:
                        size = text

            # Append the data to the list
            properties.append({
                'Title': title,
                'Price': price,
                'Location': location,
                'Bedrooms': bedrooms,
                'Bathrooms': bathrooms,
                'Size': size
            })

    # Convert to DataFrame
    df = pd.DataFrame(properties)
    return df

In [2]:
df_all_pages = scrape_pages(start_page=1, end_page=4)

Scraping page 1: https://www.buyrentkenya.com/houses-for-sale?page=1
Failed to retrieve the page. Status code: 404
Scraping page 2: https://www.buyrentkenya.com/houses-for-sale?page=2
Scraping page 3: https://www.buyrentkenya.com/houses-for-sale?page=3
Scraping page 4: https://www.buyrentkenya.com/houses-for-sale?page=4


In [3]:
df_all_pages.head()

Unnamed: 0,Title,Price,Location,Bedrooms,Bathrooms,Size
0,5 Bed House in Kyuna,"KSh 90,000,000","Kyuna, Westlands",5 Bedrooms,,
1,4 Bed House with En Suite at Ruiru,"KSh 15,000,000",Ruiru,4 Bedrooms,6 Bathrooms,
2,6 Bed House with En Suite in Garden Estate,"KSh 130,000,000","Garden Estate, Roysambu",6 Bedrooms,4 Bathrooms,
3,4 Bed Townhouse with En Suite in South B,"KSh 20,000,000",South B,4 Bedrooms,5 Bathrooms,
4,4 Bed Townhouse with Swimming Pool in Kiambu Road,"KSh 45,000,000",Kiambu Road,4 Bedrooms,5 Bathrooms,


In [None]:
from sqlalchemy import create_engine
import pandas as pd
import seaborn as sns
import datetime
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
# get db credentials and load to db
database =os.getenv('database')
user = os.getenv('user')
password = os.getenv('password')
host = os.getenv('host')
port = os.getenv('port')

# Format: postgresql://username:password@host:port/database

engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")

In [8]:
query = ''' select * from houses."buy_rent"
'''

df_all_pages.to_sql("buy_rent", engine, schema="houses", if_exists="replace", index=False)

75

In [9]:
# Load data from PostgreSQL into a DataFrame
df = pd.read_sql(query, engine)
df.head()

Unnamed: 0,Title,Price,Location,Bedrooms,Bathrooms,Size
0,5 Bed House in Kyuna,"KSh 90,000,000","Kyuna, Westlands",5 Bedrooms,,
1,4 Bed House with En Suite at Ruiru,"KSh 15,000,000",Ruiru,4 Bedrooms,6 Bathrooms,
2,6 Bed House with En Suite in Garden Estate,"KSh 130,000,000","Garden Estate, Roysambu",6 Bedrooms,4 Bathrooms,
3,4 Bed Townhouse with En Suite in South B,"KSh 20,000,000",South B,4 Bedrooms,5 Bathrooms,
4,4 Bed Townhouse with Swimming Pool in Kiambu Road,"KSh 45,000,000",Kiambu Road,4 Bedrooms,5 Bathrooms,


In [10]:
df.isnull().sum()


Title        0
Price        0
Location     0
Bedrooms     0
Bathrooms    0
Size         0
dtype: int64

In [11]:
df.dtypes

Title        object
Price        object
Location     object
Bedrooms     object
Bathrooms    object
Size         object
dtype: object