In [2]:
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import pandas as pd
import nest_asyncio
import time
import os
import math

nest_asyncio.apply()

async def scrape_page(session, url):
    async with session.get(url) as response:
        content = await response.text()
        soup = BeautifulSoup(content, 'html.parser')
        main_section = soup.select_one('#mainContent .container')
        
        if main_section is None:
            print(f"No main section found for URL: {url}")
            return pd.DataFrame()
        
        section_columns = main_section.find_all('div', attrs={'wire:id': True, 'data-cy': True})
        data = []
        for section in section_columns:
            property_name_element = section.find('a', {'data-cy': 'listing-title-link'})
            property_name = property_name_element.find('span').text.strip() if property_name_element else None

            property_price_element = section.find("div", {"data-cy": "card-price"})
            property_price = property_price_element.text.strip() if property_price_element else None

            property_desc_elements = section.select('h3:nth-of-type(2)')
            property_detailed_desc = property_desc_elements[0].text.strip() if property_desc_elements else None

            property_location_elements = section.find('div').select('div:nth-of-type(2) p')
            property_location = property_location_elements[1].text.strip() if property_location_elements else None

            property_bedrooms_element = section.find("span", {"data-cy": "card-beds"})
            property_bedrooms = property_bedrooms_element.text.strip() if property_bedrooms_element else None

            property_bathrooms_element = section.find("span", {"data-cy": "card-baths"})
            property_bathrooms = property_bathrooms_element.text.strip() if property_bathrooms_element else None

            property_status_badge_element = section.find("div", {"data-cy": "status-badge"})
            property_status_badge = property_status_badge_element.text.strip() if property_status_badge_element else None

            listing_category_element = section.find('div', attrs={'data-bi-listing-category': True})
            listing_category = listing_category_element.get('data-bi-listing-category') if listing_category_element else None

            agency_element = section.find("a", {"data-cy": "agency-logo"})
            agency = agency_element['href'].split('/')[-1] if agency_element else None
            
            wire_id_element = section.find('div', {'wire:id': True})
            wire_id = wire_id_element['wire:id'] if wire_id_element else None  # Extract the wire:id value
            
            property_data = {
                'Id': wire_id,
                'Name': property_name,
                'Price': property_price,
                'Description': property_detailed_desc,
                'Location': property_location,
                'Listing Category': listing_category,
                'Bedrooms': property_bedrooms,
                'Bathrooms': property_bathrooms,
                'Status Badge': property_status_badge,
                'Agency': agency
            }
            data.append(property_data)
        df = pd.DataFrame(data)
        return df

async def scrape_property_data(start_page, end_page):
    base_url = 'https://www.buyrentkenya.com/property-for-rent'
    headers = {
        'User_Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.5'
    }
    async with aiohttp.ClientSession(headers=headers) as session:
        results = []
        for page in range(start_page, end_page + 1):
            url = base_url if page == 1 else f'{base_url}?page={page}'
            df = await scrape_page(session, url)
            results.append(df)
            await asyncio.sleep(5)  # Add a delay between requests
        main_df = pd.concat(results, ignore_index=True)
        return main_df


async def main(start_page, end_page):
    async with aiohttp.ClientSession() as session:
        base_url = 'https://www.buyrentkenya.com/property-for-rent'
        async with session.get(base_url) as response:
            content = await response.text()
            soup = BeautifulSoup(content, 'html.parser')
            max_listings = int(soup.find('div', {'data-cy': 'search-result-count'}).find('span').text.split()[-2])
            iterations = (max_listings - start_page + 1) // ((end_page - start_page + 1) * 17) + 1   # Calculate iterations
            
        results = []
        if iterations > 0:
            for i in range(iterations):
                df = await scrape_property_data(start_page, end_page)
                results.append(df)
                start_page = end_page + 1  # Update start_page for the next iteration
                end_page = start_page + 4  # Increment end_page for the next iteration
                await asyncio.sleep(5)  # Add a delay between iterations

        if results:
            main_df = pd.concat(results, ignore_index=True)
        else:
            main_df = pd.DataFrame()  # Empty DataFrame

        return main_df


start_time = time.time()
df = asyncio.run(main(1, 5))
end_time = time.time()

elapsed_time = end_time - start_time
print(f"Program executed in {elapsed_time} seconds.")

# Define the folder path where you want to save the CSV file
folder_path = r''

# Define the filename for the CSV file
filename = 'property_data.csv'

# Create the full file path by joining the folder path and filename
file_path = os.path.join(folder_path, filename)

# Save the DataFrame to the CSV file
df.to_csv(file_path, index=False)

#show df
df


Program executed in 3145.51912522316 seconds.


Unnamed: 0,Id,Name,Price,Description,Location,Listing Category,Bedrooms,Bathrooms,Status Badge,Agency
0,xkhVbO8pt2hy3CV0VDql,4 Bed House with En Suite at Lavington,"KSh 200,000",Lavington : 4 Bedroom All En suite Townhouse i...,Lavington,Other Houses,,,Gold,
1,skboYouLuewoKAlUVcs1,Furnished 1 Bed Apartment with En Suite in Kil...,"KSh 80,000",Spacious 1 Bedroom Furnished Apartment,Kilimani,Other Apartments,,,Gold,
2,bgSaps44nJe1BLg8SWbr,4 Bed House with En Suite at Off Limuru Rd,"KSh 140,000",Executive 4 bedroom House All en-suite + dsq F...,"02, Off limuru Rd, Redhill",Other Houses,,,Silver,
3,LMRHMwhChcIYORwpMfIT,Serviced 3 Bed Apartment with En Suite at Kili...,"KSh 230,000",Kilimani : 3 Bedroom New and Modern Furnished ...,Kilimani,Other Apartments,,,Silver,
4,0RzdTIlE1U5sM9p7ZvGD,4 Bed House with En Suite at Off Kiambu Road,"KSh 160,000",Executive 4 Bedroom House Master Bedroom en-su...,"01, Off Kiambu Road, Kiambu Road",Other Houses,,,Silver,
...,...,...,...,...,...,...,...,...,...,...
5762,xIzPwB7yp600uY0vJRb5,2 Bed Apartment with En Suite at Kilimani,"KSh 75,000",Brand new apt 2bd master ensuite lift Borehole...,"kilimani, Kilimani",Other Apartments,,,Silver,
5763,MHD7E31wqpG3w8qXfhsI,3 Bed House with En Suite in Rosslyn,"KSh 420,000",Luxury 3 Bedroom Bungalow to let Rosslyn,"Rosslyn, Westlands",Other Houses,,,Silver,
5764,jgdQRceQ7i9kHPTyjQ36,4 Bed House with En Suite at Runda Paradise,"KSh 250,000",Executive 4 Bedroom House all en-suite + dsq F...,"00, Runda Paradise, Runda, Westlands",Other Houses,,,Silver,
5765,UVWyhUqgDhF0Yl9HVFne,3 Bed Apartment with En Suite in Kileleshwa,"KSh 150,000",Modern Elegance! Kileleshwa 3 bedroom Unfurnis...,Kileleshwa,Other Apartments,,,Silver,
