In [43]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
# Base URLs
base_url = 'https://www.buyrentkenya.com'

In [68]:
def fetch_page(property_type,payment_type, page):
    page_url = f'{base_url}/{property_type}-for-{payment_type}?page={page}'
    if page == 1:
        page_url = f'{base_url}/{property_type}-for-{payment_type}'
    page_response = requests.get(page_url)
    if page_response.status_code != 200:
        return [], False
    page_soup = BeautifulSoup(page_response.text, 'lxml')
    houses = page_soup.find_all('div', class_="flex flex-col justify-between px-5 py-4 md:w-3/5")
    page_data = []
    for house in houses:
        try:
            url = house.find('h2', class_='font-semibold md:hidden').find('a', class_='no-underline')['href']
            house_url = base_url + url
            house_url_response = requests.get(house_url)
            if house_url_response.status_code != 200:
                print(f"House: {house_url} request failed")
                continue
            house_url_soup = BeautifulSoup(house_url_response.text, 'lxml')
            # Extract information
            title = house.find('span', class_="relative top-[2px] hidden md:inline")
            title = title.text.strip() if title else None
            location = house.find('p', 'ml-1')
            location = location.text.strip() if location else None
            bedrooms = house.find(attrs={'data-cy': 'card-beds'})
            bedrooms = bedrooms.text.strip() if bedrooms else None
            bathrooms = house.find(attrs={'data-cy': 'card-bathrooms'})
            bathrooms = bathrooms.text.strip() if bathrooms else None
            size = house.find(attrs={'data-cy': 'card-area'})
            size = size.text.strip() if size else None
            date = house_url_soup.find('div', class_='flex justify-between py-2').find('span', class_='font-semibold')
            date = date.text.strip() if date else None
            amenities = []
            features = house_url_soup.find('section', class_='bg-highlight px-3 md:bg-white md:px-0').find_all('li', class_='flex')
            for feature in features:
                amenities.append(feature.text.strip('\n\n|'))
            amenities = sorted(amenities)
            price = house_url_soup.find('span', class_='block text-right text-xl font-semibold leading-7 md:text-xxl md:font-extrabold')
            price = price.text.strip() if price else None
            page_data.append({
                "Title": title,
                "Location": location,
                "Bedrooms": bedrooms,
                "Bathrooms": bathrooms,
                "Size": size,
                "Date": date,
                "Amenities": amenities,
                "Url": url,
                "Property Type": property_type,
                "Payment_type": payment_type,
                "Price": price
            })
        except Exception as e:
            print(f"Error processing house on page {page}: {e}")
    print(f'Page: {page} done')
    return page_data, True

In [61]:
rent_str = 'rent'
sale_str = 'sale'

houses_str = 'houses'
apartments_str = 'flats-apartments'
land_str = 'land'
commercial_property_str = 'commercial-property'
bedsitter_str = 'bedsitters'

In [69]:
# Start with Houses for sale
def create_dataset(property_str, payment_str):
  with ThreadPoolExecutor() as exe:
    page_state = True
    page = 1
    futures = []
    while page_state:
      future = exe.submit(fetch_page, property_str, payment_str, page)
      houses_for_sale, page_state = future.result()
      futures.extend(houses_for_sale)
      page += 1

  return futures

In [73]:
commercial_prop_for_sale = create_dataset(commercial_property_str, sale_str)
commercial_prop_for_sale_df = pd.DataFrame(commercial_prop_for_sale)

Page: 1 done
Error processing house on page 2: 'NoneType' object has no attribute 'find'
Page: 2 done
Page: 3 done
Page: 4 done
Page: 5 done
Error processing house on page 6: 'NoneType' object has no attribute 'find'
Page: 6 done
Page: 7 done
Page: 8 done
Page: 9 done
Page: 10 done
Page: 11 done
Page: 12 done
Page: 13 done
Page: 14 done
Page: 15 done
Page: 16 done
Page: 17 done
Page: 18 done
Page: 19 done
Page: 20 done
Page: 21 done
Page: 22 done


In [76]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [77]:
commercial_prop_for_sale_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/commercial_prop_for_sale.csv', index=False)

In [78]:
houses_for_sale = create_dataset(houses_str, sale_str)
houses_for_sale_df = pd.DataFrame(houses_for_sale)
houses_for_sale_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/houses_for_sale.csv', index=False)

Page: 1 done
Page: 2 done
Page: 3 done
Page: 4 done
Page: 5 done
Page: 6 done
Page: 7 done
Page: 8 done
Page: 9 done
Page: 10 done
Page: 11 done
Page: 12 done
Page: 13 done
Page: 14 done
Page: 15 done
Page: 16 done
Page: 17 done
Page: 18 done
Page: 19 done
Page: 20 done
Page: 21 done
Page: 22 done
Page: 23 done
Page: 24 done
Page: 25 done
Page: 26 done
Page: 27 done
Page: 28 done
Page: 29 done
Error processing house on page 30: 'NoneType' object has no attribute 'find'
Page: 30 done
Page: 31 done
Page: 32 done
Page: 33 done
Page: 34 done
Page: 35 done
Page: 36 done
Page: 37 done
Page: 38 done
Page: 39 done
Page: 40 done
Page: 41 done
Page: 42 done
Page: 43 done
Page: 44 done
Page: 45 done
Page: 46 done
Page: 47 done
Page: 48 done
Page: 49 done
Page: 50 done
Page: 51 done
Page: 52 done
Page: 53 done
Page: 54 done
Page: 55 done
Page: 56 done
Page: 57 done
Page: 58 done
Page: 59 done
Page: 60 done
Page: 61 done
Page: 62 done
Page: 63 done
Page: 64 done
Page: 65 done
Page: 66 done
Page: 67

In [79]:
apartments_for_sale = create_dataset(apartments_str, sale_str)
apartments_for_sale_df = pd.DataFrame(apartments_for_sale)
apartments_for_sale_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/apartments_for_sale.csv', index=False)

Page: 1 done
Page: 2 done
Page: 3 done
Page: 4 done
Page: 5 done
Page: 6 done
Page: 7 done
Page: 8 done
Page: 9 done
Page: 10 done
Page: 11 done
Page: 12 done
Page: 13 done
Page: 14 done
Page: 15 done
Page: 16 done
Error processing house on page 17: 'NoneType' object has no attribute 'find'
Page: 17 done
Page: 18 done
Page: 19 done
Page: 20 done
Page: 21 done
Page: 22 done
Page: 23 done
Page: 24 done
Page: 25 done
Page: 26 done
Page: 27 done
Page: 28 done
Page: 29 done
Page: 30 done
Page: 31 done
Page: 32 done
Page: 33 done
Page: 34 done
Page: 35 done
Page: 36 done
Page: 37 done
Page: 38 done
Page: 39 done
Error processing house on page 40: 'NoneType' object has no attribute 'find'
Page: 40 done
Page: 41 done
Page: 42 done
Page: 43 done
Page: 44 done
Page: 45 done
Page: 46 done
Page: 47 done
Page: 48 done
Page: 49 done
Page: 50 done
Page: 51 done
Page: 52 done
Page: 53 done
Page: 54 done
Page: 55 done
Error processing house on page 56: 'NoneType' object has no attribute 'find'
Page: 56

In [80]:
land_for_sale = create_dataset(land_str, sale_str)
land_for_sale_df = pd.DataFrame(land_for_sale)
land_for_sale_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/land_for_sale.csv', index=False)

Page: 1 done
Page: 2 done
Page: 3 done
Page: 4 done
Page: 5 done
Page: 6 done
Page: 7 done
Page: 8 done
Page: 9 done
Page: 10 done
Page: 11 done
Page: 12 done
Page: 13 done
Page: 14 done
Page: 15 done
Page: 16 done
Page: 17 done
Page: 18 done
Page: 19 done
Page: 20 done
Page: 21 done
Page: 22 done
Page: 23 done
Page: 24 done
Page: 25 done
Page: 26 done
Page: 27 done
Page: 28 done
Page: 29 done
Page: 30 done
Page: 31 done
Page: 32 done
Page: 33 done
Page: 34 done
Page: 35 done
Page: 36 done
Page: 37 done
Page: 38 done
Page: 39 done
Page: 40 done
Page: 41 done
Page: 42 done
Page: 43 done
Page: 44 done
Page: 45 done
Page: 46 done
Page: 47 done
Page: 48 done
Page: 49 done
Page: 50 done
Page: 51 done
Page: 52 done
Page: 53 done
Error processing house on page 54: 'NoneType' object has no attribute 'find'
Page: 54 done
Error processing house on page 55: 'NoneType' object has no attribute 'find'
Page: 55 done
Page: 56 done
Page: 57 done
Page: 58 done
Page: 59 done
Page: 60 done
Page: 61 done
P

In [82]:
houses_for_rent = create_dataset(houses_str, rent_str)
houses_for_rent_df = pd.DataFrame(houses_for_rent)
houses_for_rent_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/houses_for_rent.csv', index=False)

Page: 1 done
Page: 2 done
Page: 3 done
Page: 4 done
Page: 5 done
Page: 6 done
Page: 7 done
Page: 8 done
Page: 9 done
Page: 10 done
Page: 11 done
Page: 12 done
Page: 13 done
Page: 14 done
Page: 15 done
Page: 16 done
Page: 17 done
Page: 18 done
Page: 19 done
Page: 20 done
Page: 21 done
Page: 22 done
Page: 23 done
Page: 24 done
Page: 25 done
Page: 26 done
Page: 27 done
Page: 28 done
Page: 29 done
Page: 30 done
Page: 31 done
Page: 32 done
Page: 33 done
Page: 34 done
Error processing house on page 35: 'NoneType' object has no attribute 'find'
Page: 35 done
Page: 36 done
Page: 37 done
Page: 38 done
Error processing house on page 39: 'NoneType' object has no attribute 'find'
Page: 39 done
Page: 40 done
Page: 41 done
Page: 42 done
Page: 43 done
Page: 44 done
Page: 45 done
Page: 46 done
Page: 47 done
Page: 48 done
Page: 49 done
Page: 50 done
Page: 51 done
Page: 52 done
Page: 53 done
Page: 54 done
Page: 55 done
Page: 56 done
Page: 57 done
Page: 58 done
Page: 59 done
Page: 60 done
Page: 61 done
P

In [83]:
apartments_for_rent = create_dataset(apartments_str, rent_str)
apartments_for_rent_df = pd.DataFrame(apartments_for_rent)
apartments_for_rent_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/apartments_for_rent.csv', index=False)

Page: 1 done
Page: 2 done
Page: 3 done
Page: 4 done
Page: 5 done
Page: 6 done
Page: 7 done
Page: 8 done
Page: 9 done
Page: 10 done
Page: 11 done
Page: 12 done
Page: 13 done
Page: 14 done
Page: 15 done
Page: 16 done
Page: 17 done
Page: 18 done
Page: 19 done
Page: 20 done
Page: 21 done
Page: 22 done
Page: 23 done
Page: 24 done
Page: 25 done
Page: 26 done
Page: 27 done
Page: 28 done
Page: 29 done
Page: 30 done
Page: 31 done
Page: 32 done
Page: 33 done
Error processing house on page 34: 'NoneType' object has no attribute 'find'
Page: 34 done
Page: 35 done
Page: 36 done
Page: 37 done
Page: 38 done
Page: 39 done
Page: 40 done
Page: 41 done
Error processing house on page 42: 'NoneType' object has no attribute 'find'
Page: 42 done
Page: 43 done
Page: 44 done
Page: 45 done
Page: 46 done
Page: 47 done
Page: 48 done
Page: 49 done
Page: 50 done
Page: 51 done
Page: 52 done
Page: 53 done
Page: 54 done
Page: 55 done
Page: 56 done
Page: 57 done
Page: 58 done
Page: 59 done
Page: 60 done
Page: 61 done
P

In [88]:
commercial_prop_for_rent = create_dataset(commercial_property_str, rent_str)
commercial_prop_for_rent_df = pd.DataFrame(commercial_prop_for_rent)
commercial_prop_for_rent_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/commercial_prop_for_rent.csv', index=False)

In [89]:
bedsitter_for_rent = create_dataset(bedsitter_str, rent_str)
bedsitter_for_rent_df = pd.DataFrame(bedsitter_for_rent)
bedsitter_for_rent_df.to_csv('/content/drive/My Drive/Colab Notebooks/data/bedsitter_for_rent.csv', index=False)

Page: 1 done


In [93]:
len(bedsitter_for_rent)

11