**Scrapping Nigeria Rent and Sale Propety from PropertPro** 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import os

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='property-listing-grid')
    
    for listing in listings:
        try:
            # Extract price
            price = listing.find('div', class_='pl-price').find('h3').text.strip()
        except AttributeError:
            price = 'N/A'
        
        try:
            # Extract title
            title = listing.find('div', class_='pl-title').text.strip()
        except AttributeError:
            title = 'N/A'

        try:
            # Extract property ID
            pid = listing.find('p').text.strip().replace("PID :", "")
        except AttributeError:
            pid = 'N/A'

        try:
            # Extract number of beds and baths
            details = listing.find('h6').text.strip()
        except AttributeError:
            details = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'PID': pid,
            'Details': details
        })
        
    return properties

# URL template (with page number placeholder)
url_template = 'https://www.propertypro.ng/property-for-sale?page={}'

# Number of pages to crawl (adjust based on the total pages)
num_pages = 719

# File to save progress
output_file = 'propertypro_sale_listings.csv'
last_page_file = 'last_page.txt'

# List to store all scraped properties
all_properties = []

# Check if there's already a last saved page to resume from
if os.path.exists(last_page_file):
    with open(last_page_file, 'r') as f:
        last_page = int(f.read().strip()) + 1
else:
    last_page = 1

# Crawl through multiple pages, starting from the last saved page
for page_num in range(last_page, num_pages + 1):
    print(f"Scraping page {page_num}...")
    url = url_template.format(page_num)
    
    try:
        # Send a GET request to fetch the HTML content
        response = requests.get(url, timeout=10)  # Add timeout to handle slow responses
        
        if response.status_code == 200:
            # Parse the HTML content using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Extract property details from the page
            properties = extract_property_details(soup)
            
            # Add the scraped data to the list
            all_properties.extend(properties)
            
            # Save progress every 20 pages
            if page_num % 20 == 0:
                df = pd.DataFrame(all_properties)
                
                if os.path.exists(output_file):
                    # Append to the existing file
                    df.to_csv(output_file, mode='a', header=False, index=False)
                else:
                    # Save as a new file
                    df.to_csv(output_file, index=False)
                
                all_properties = []  # Clear list after saving
                print(f"Saved progress at page {page_num}.")
            
            # Update last scraped page
            with open(last_page_file, 'w') as f:
                f.write(str(page_num))
            
            # Delay to avoid overloading the server
            time.sleep(2)
        else:
            print(f"Failed to fetch page {page_num}, status code: {response.status_code}")
    
    except requests.exceptions.RequestException as e:
        print(f"Error on page {page_num}: {e}")
        # Retry fetching the page after a short delay
        time.sleep(5)
        continue  # Skip to the next page

# Final save of any remaining data
if all_properties:
    df = pd.DataFrame(all_properties)
    df.to_csv(output_file, mode='a', header=False, index=False)
    print(f"Final data saved to '{output_file}'.")

print("Scraping complete.")

In [None]:
   # Function to extract property details for rent listings
def extract_rental_details(soup):
    properties = []
    
    # Find all property listings for rent
    listings = soup.find_all('div', class_='property-listing-grid')
    
    for listing in listings:
        try:
            # Extract price
            price = listing.find('div', class_='pl-price').find('h3').text.strip()
        except AttributeError:
            price = 'N/A'
        
        try:
            # Extract title
            title = listing.find('div', class_='pl-title').text.strip()
        except AttributeError:
            title = 'N/A'

        try:
            # Extract property ID
            pid = listing.find('p').text.strip().replace("PID :", "")
        except AttributeError:
            pid = 'N/A'

        try:
            # Extract number of beds and baths
            details = listing.find('h6').text.strip()
        except AttributeError:
            details = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'PID': pid,
            'Details': details
        })
        
    return properties

# URL template for houses for rent (with page number placeholder)
url_template = 'https://www.propertypro.ng/property-for-rent?page={}'

# Number of pages to crawl (you can adjust this)
num_pages = 293

# List to store all scraped properties
all_rental_properties = []

# Crawl through multiple pages for rent listings
for page_num in range(1, num_pages + 1):
    print(f"Scraping page {page_num} for rent listings...")
    url = url_template.format(page_num)
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        rental_properties = extract_rental_details(soup)
        all_rental_properties.extend(rental_properties)
        time.sleep(2)
    else:
        print(f"Failed to fetch page {page_num}, status code: {response.status_code}")

# Convert the data into a DataFrame
df_rentals = pd.DataFrame(all_rental_properties)

# Save the data into a CSV file
df_rentals.to_csv('propertypro_rent_listings.csv', index=False)
print("Data saved to 'propertypro_rent_listings.csv'")

**Scrapping Rent and Sale Properties for Kenya**   

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import json
import os

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='sc_panelWrapper')
    
    for listing in listings:
        try:
            # Extract price
            price = listing.find('div', class_='p24_price').text.strip()
        except AttributeError:
            price = 'N/A'
        
        try:
            # Extract title
            title = listing.find('div', class_='p24_regularTile').text.strip()
        except AttributeError:
            title = 'N/A'

        try:
            # Extract property details (e.g., bedrooms, bathrooms)
            details = listing.find('span', class_='js_listingTileImageHolder').text.strip()
        except AttributeError:
            details = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'Details': details
        })
        
    return properties

# Base URL template (with placeholders for province name and ID)
base_url = 'https://www.property24.co.ke/property-for-sale-in-{}-p{}?Page={}'

# List of provinces with their associated IDs
provinces = {
    'mombasa': 93,
    'kwale': 85,
    'kilifi': 80,
    'tana river': 105,
    'lamu': 87,
    'taita–taveta': 104,
    'garissa': 73,
    'wajir': 111,
    'mandera': 89,
    'marsabit': 90,
    'isiolo': 75,
    'meru': 91,
    'tharaka-nithi': 106,
    'embu': 72,
    'kitui': 84,
    'machakos': 66,
    'makueni': 88,
    'nyandarua': 100,
    'nyeri': 101,
    'kirinyaga': 81,
    'muranga': 94,
    'kiambu': 79,
    'turkana': 108,
    'west pokot': 112,
    'samburu': 102,
    'trans-nzoia': 107,
    'uasin gishu': 109,
    'elgeyo-marakwet': 71,
    'nandi': 97,
    'baringo': 67,
    'laikipia': 86,
    'nakuru': 96,
    'narok': 98,
    'kajiado': 76,
    'kericho': 78,
    'bomet': 68,
    'kakamega': 77,
    'vihiga': 110,
    'bungoma': 69,
    'busia': 70,
    'siaya': 103,
    'kisumu': 83,
    'homa bay': 74,
    'migori': 92,
    'kisii': 82,
    'nyamira': 99,
    'nairobi': 95
    # Add more provinces and their IDs here...
}

# File to save progress and track last page scraped for each province
progress_file = 'scraping_progress.json'

# Load progress if it exists
if os.path.exists(progress_file):
    with open(progress_file, 'r') as file:
        progress = json.load(file)
else:
    progress = {province: 1 for province in provinces}  # Start from page 1 for all provinces

# Placeholder for all scraped properties
all_properties = []

# Function to scrape a specific province
def scrape_province(province, province_id):
    page_num = progress.get(province, 1)  # Start from the last saved page
    while True:
        print(f"Scraping {province}, Page {page_num}...")
        
        # Construct the URL with the province and page number
        url = base_url.format(province, province_id, page_num)
        
        # Request the page
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to fetch {url}, status code: {response.status_code}")
            break
        
        # Parse the content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract property details
        properties = extract_property_details(soup)
        
        if not properties:
            print(f"No more listings found for {province}. Stopping at page {page_num}.")
            break
        
        # Add properties to the global list
        all_properties.extend(properties)
        
        # Save progress after each page
        progress[province] = page_num
        with open(progress_file, 'w') as file:
            json.dump(progress, file)
        
        # Save data incrementally to avoid data loss
        pd.DataFrame(all_properties).to_csv('property24_kenya_listings.csv', index=False)
        
        # Check if a 'next' page link exists
        next_button = soup.find('li', class_='pagelink')
        if not next_button:
            print(f"Finished scraping {province} after {page_num} pages.")
            break
        
        # Delay between requests to avoid overloading the server
        time.sleep(2)
        
        # Increment page number
        page_num += 1

# Loop through all provinces
for province, province_id in provinces.items():
    scrape_province(province, province_id)

print("Scraping complete. Data saved to 'property24_kenya_listings.csv'.")

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import json
import os

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='sc_panelWrapper')
    
    for listing in listings:
        try:
            # Extract price
            price = listing.find('span', class_='p24_price').text.strip()
        except AttributeError:
            price = 'N/A'
        
        try:
            # Extract title
            title = listing.find('span', class_='p24_propertyTitle').text.strip()
        except AttributeError:
            title = 'N/A'

        try:
            # Extract property details (e.g., location)
            details = listing.find('span', class_='p24_location').text.strip()
        except AttributeError:
            details = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'Details': details
        })
        
    return properties

# Base URL template (with placeholders for province name and ID)
base_url = 'https://www.property24.co.ke/property-to-rent-in-{}-p{}?Page={}'

# List of provinces with their associated IDs
provinces = {
    'mombasa': 93,
    'kwale': 85,
    'kilifi': 80,
    'tana river': 105,
    'lamu': 87,
    'taita–taveta': 104,
    'garissa': 73,
    'wajir': 111,
    'mandera': 89,
    'marsabit': 90,
    'isiolo': 75,
    'meru': 91,
    'tharaka-nithi': 106,
    'embu': 72,
    'kitui': 84,
    'machakos': 66,
    'makueni': 88,
    'nyandarua': 100,
    'nyeri': 101,
    'kirinyaga': 81,
    'muranga': 94,
    'kiambu': 79,
    'turkana': 108,
    'west pokot': 112,
    'samburu': 102,
    'trans-nzoia': 107,
    'uasin gishu': 109,
    'elgeyo-marakwet': 71,
    'nandi': 97,
    'baringo': 67,
    'laikipia': 86,
    'nakuru': 96,
    'narok': 98,
    'kajiado': 76,
    'kericho': 78,
    'bomet': 68,
    'kakamega': 77,
    'vihiga': 110,
    'bungoma': 69,
    'busia': 70,
    'siaya': 103,
    'kisumu': 83,
    'homa bay': 74,
    'migori': 92,
    'kisii': 82,
    'nyamira': 99,
    'nairobi': 95, # Limiting Nairobi to 1000 pages later in the loop
}

# File to save progress and track last page scraped for each province
progress_file = 'scraping_progress_rent.json'

# Load progress if it exists
if os.path.exists(progress_file):
    with open(progress_file, 'r') as file:
        progress = json.load(file)
else:
    progress = {province: 1 for province in provinces}  # Start from page 1 for all provinces

# Placeholder for all scraped properties
all_properties = []

# Function to scrape a specific province
def scrape_province(province, province_id):
    page_num = progress.get(province, 1)  # Start from the last saved page
    max_pages = 1000 if province == 'nairobi' else 9999  # Limit to 1000 pages for Nairobi
    
    while page_num <= max_pages:
        print(f"Scraping {province}, Page {page_num}...")
        
        # Construct the URL with the province and page number
        url = base_url.format(province, province_id, page_num)
        
        # Request the page
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to fetch {url}, status code: {response.status_code}")
            break
        
        # Parse the content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract property details
        properties = extract_property_details(soup)
        
        if not properties:
            print(f"No more listings found for {province}. Stopping at page {page_num}.")
            break
        
        # Add properties to the global list
        all_properties.extend(properties)
        
        # Save progress after each page
        progress[province] = page_num
        with open(progress_file, 'w') as file:
            json.dump(progress, file)
        
        # Save data incrementally to avoid data loss
        pd.DataFrame(all_properties).to_csv('property24_kenya_rent_listings.csv', index=False)
        
        # Check if a 'next' page link exists
        next_button = soup.find('li', class_='pagelink')
        if not next_button:
            print(f"Finished scraping {province} after {page_num} pages.")
            break
        
        # Delay between requests to avoid overloading the server
        time.sleep(2)
        
        # Increment page number
        page_num += 1

# Loop through all provinces
for province, province_id in provinces.items():
    scrape_province(province, province_id)

print("Scraping complete. Data saved to 'property24_kenya_rent_listings.csv'.")

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import json
import os

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='sc_panelWrapper')
    
    for listing in listings:
        try:
            # Extract price
            price = listing.find('span', class_='p24_price').text.strip()
        except AttributeError:
            price = 'N/A'
        
        try:
            # Extract title
            title = listing.find('span', class_='p24_propertyTitle').text.strip()
        except AttributeError:
            title = 'N/A'

        try:
            # Extract property details (e.g., location)
            details = listing.find('span', class_='p24_location').text.strip()
        except AttributeError:
            details = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'Details': details
        })
        
    return properties

# Base URL template (with placeholders for province name and ID)
base_url = 'https://www.property24.co.ke/property-for-sale-in-{}-p{}?Page={}'

# List of provinces with their associated IDs
provinces = {
    'mombasa': 93,
    'nairobi': 95, # Limiting Nairobi to 1000 pages later in the loop
}

# File to save progress and track last page scraped for each province
progress_file = 'scraping_progress_sale.json'

# Load progress if it exists
if os.path.exists(progress_file):
    with open(progress_file, 'r') as file:
        progress = json.load(file)
else:
    progress = {province: 1 for province in provinces}  # Start from page 1 for all provinces

# Placeholder for all scraped properties
all_properties = []

# Function to scrape a specific province
def scrape_province(province, province_id):
    page_num = progress.get(province, 1)  # Start from the last saved page
    max_pages = 1000 if province == 'nairobi' else 9999  # Limit to 1000 pages for Nairobi
    
    while page_num <= max_pages:
        print(f"Scraping {province}, Page {page_num}...")
        
        # Construct the URL with the province and page number
        url = base_url.format(province, province_id, page_num)
        
        # Request the page
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to fetch {url}, status code: {response.status_code}")
            break
        
        # Parse the content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract property details
        properties = extract_property_details(soup)
        
        if not properties:
            print(f"No more listings found for {province}. Stopping at page {page_num}.")
            break
        
        # Add properties to the global list
        all_properties.extend(properties)
        
        # Save progress after each page
        progress[province] = page_num
        with open(progress_file, 'w') as file:
            json.dump(progress, file)
        
        # Save data incrementally to avoid data loss
        pd.DataFrame(all_properties).to_csv('property24_kenya_sale_listings.csv', index=False)
        
        # Check if a 'next' page link exists
        next_button = soup.find('li', class_='pagelink')
        if not next_button:
            print(f"Finished scraping {province} after {page_num} pages.")
            break
        
        # Delay between requests to avoid overloading the server
        time.sleep(2)
        
        # Increment page number
        page_num += 1

# Loop through all provinces
for province, province_id in provinces.items():
    scrape_province(province, province_id)

print("Scraping complete. Data saved to 'property24_kenya_sale_listings.csv'.")

DR CONGO 

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import os

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='ligne_bien')
    
    for listing in listings:
        try:
            # Extract price (nested inside <font> within <div class="price">)
            price = listing.find('div', class_='price').find('font').text.strip()
        except AttributeError:
            price = 'N/A'
        
        try:
            # Extract title (from <a> tag)
            anchor = listing.find('a')
            title = anchor['title'].strip()
            url = anchor['href']
        except AttributeError:
            title = 'N/A'
            url = 'N/A'

        try:
            # Extract room details (from <font> tag with room info)
            rooms = listing.find_all('font', style="vertical-align: inherit;")[1].text.strip()
        except (AttributeError, IndexError):
            rooms = 'N/A'

        properties.append({
            'Title': title,
            'Price': price,
            'Rooms': rooms,
            'URL': url
        })
        
    return properties

# URL template with pagination
url_template = 'https://www.imcongo.com/congo-a-louer-tri--mode-list-recherche--congo-2-immo-en.html?page={}'

# Number of pages to crawl (adjust this based on how many pages are available)
num_pages = 5  # Adjust this number based on actual pagination

# List to store all scraped properties
all_properties = []

# File to save the progress incrementally
output_file = 'imcongo_rent_listings.csv'

# Check if the file already exists, so we don't overwrite data during scraping
if os.path.exists(output_file):
    existing_df = pd.read_csv(output_file)
    all_properties = existing_df.to_dict('records')  # Load existing data into list
else:
    existing_df = pd.DataFrame()  # Create empty DataFrame if file doesn't exist

# Crawl through multiple pages
for page_num in range(1, num_pages + 1):
    print(f"Scraping page {page_num}...")
    
    # Generate the URL for the current page
    url = url_template.format(page_num)
    
    # Send a GET request to fetch the HTML content
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract property details from the page
        properties = extract_property_details(soup)
        
        # If no properties found, break (assumes no listings means end of pages)
        if not properties:
            print(f"No more listings found on page {page_num}. Stopping.")
            break
        
        # Add the scraped data to the list
        all_properties.extend(properties)
        
        # Save progress after each page
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)  # Save to CSV after scraping each page
        
        # Delay to avoid overloading the server
        time.sleep(2)  # Sleep for 2 seconds between requests
    else:
        print(f"Failed to fetch page {page_num}, status code: {response.status_code}")
        break

print(f"Scraping complete. Data saved to '{output_file}'")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping complete. Data saved to 'imcongo_rent_listings.csv'


EGYPT

In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='propertyItem')
    
    for listing in listings:
        try:
            # Extract title (from <a> tag with class 'propertyImgLink')
            title_tag = listing.find('a', class_='propertyImgLink')
            title = title_tag['title'].strip() if title_tag else 'N/A'
        except AttributeError:
            title = 'N/A'
        
        try:
            # Extract price (from <span> tag with class 'price')
            price_tag = listing.find('span', class_='price')
            price = price_tag.text.strip() if price_tag else 'N/A'
        except AttributeError:
            price = 'N/A'

        try:
            # Extract bedroom info (from the <td> containing "Beds" text)
            bedroom_td = listing.find('td', text=lambda x: x and 'Beds' in x)
            bedroom_info = bedroom_td.text.strip() if bedroom_td else 'N/A'
        except AttributeError:
            bedroom_info = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'Bedrooms': bedroom_info
        })
        
    return properties

# URL template with pagination
url_template = 'http://www.theegyptrealestate.com/properties/all/date_desc/grid/{}'

# Number of pages to crawl (adjust based on the actual last page)
num_pages = 221  # Change this if needed

# List to store all scraped properties
all_properties = []

# File to save the progress incrementally
output_file = 'egypt_rent_listings.csv'

# Crawl through multiple pages
for page_num in range(1, num_pages + 1):
    print(f"Scraping page {page_num}...")
    
    # Generate the URL for the current page
    url = url_template.format(page_num)
    
    # Send a GET request to fetch the HTML content
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract property details from the page
        properties = extract_property_details(soup)
        
        # If no properties found, break (assumes no listings means end of pages)
        if not properties:
            print(f"No more listings found on page {page_num}. Stopping.")
            break
        
        # Add the scraped data to the list
        all_properties.extend(properties)
        
        # Save progress after each page
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)  # Save to CSV after scraping each page
        
        # Delay to avoid overloading the server
        time.sleep(2)  # Sleep for 2 seconds between requests
    else:
        print(f"Failed to fetch page {page_num}, status code: {response.status_code}")
        break

print(f"Scraping complete. Data saved to '{output_file}'")

Scraping page 1...


  bedroom_td = listing.find('td', text=lambda x: x and 'Beds' in x)


Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...


KeyboardInterrupt: 

In [15]:
import requests
from bs4 import BeautifulSoup

# Function to extract bedroom information
def extract_bedroom_info(soup):
    bedrooms = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='propertyItem')
    
    for listing in listings:
        try:
            # Find the <td> that contains the bed icon and extract the number of beds
            bed_td = listing.find('td', text=False)
            bed_img = bed_td.find('img', alt=True)
            
            if bed_img and 'Beds' in bed_img['alt']:
                # Extract the number of beds from the alt attribute
                bed_info = bed_img['alt'].strip()
            else:
                bed_info = 'N/A'
        except AttributeError:
            bed_info = 'N/A'
        
        bedrooms.append(bed_info)
    
    return bedrooms

# URL to scrape (example)
url = 'http://www.theegyptrealestate.com/properties/all/date_desc/grid/1'

# Send a GET request to fetch the HTML content
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Extract bedroom information
    bedroom_info = extract_bedroom_info(soup)
    
    # Print the extracted bedroom info
    print(bedroom_info)
else:
    print(f"Failed to fetch the page, status code: {response.status_code}")

['N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A']


  bed_td = listing.find('td', text=False)


In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to extract property details
def extract_property_details(soup):
    properties = []
    
    # Find all property listings
    listings = soup.find_all('div', class_='propertyItem')
    
    for listing in listings:
        try:
            # Extract title (from <a> tag with class 'propertyImgLink')
            title_tag = listing.find('a', class_='propertyImgLink')
            title = title_tag['title'].strip() if title_tag else 'N/A'
        except AttributeError:
            title = 'N/A'
        
        try:
            # Extract price (from <span> tag with class 'price')
            price_tag = listing.find('span', class_='price')
            price = price_tag.text.strip() if price_tag else 'N/A'
        except AttributeError:
            price = 'N/A'

        try:
            # Extract bedroom info from the <td> containing the <img> tag with alt attribute
            bedroom_info_tag = listing.find('td').find('img', alt=True)
            bedroom_info = bedroom_info_tag['alt'].strip() if bedroom_info_tag else 'N/A'
        except AttributeError:
            bedroom_info = 'N/A'
        
        properties.append({
            'Title': title,
            'Price': price,
            'Bedrooms': bedroom_info
        })
        
    return properties

# URL template with pagination
url_template = 'http://www.theegyptrealestate.com/properties/all/date_desc/grid/{}'

# Number of pages to crawl (adjust based on the actual last page)
num_pages = 221  # Change this if needed

# List to store all scraped properties
all_properties = []

# File to save the progress incrementally
output_file = 'egypt_rent_listings.csv'

# Crawl through multiple pages
for page_num in range(1, num_pages + 1):
    print(f"Scraping page {page_num}...")
    
    # Generate the URL for the current page
    url = url_template.format(page_num)
    
    # Send a GET request to fetch the HTML content
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract property details from the page
        properties = extract_property_details(soup)
        
        # If no properties found, break (assumes no listings means end of pages)
        if not properties:
            print(f"No more listings found on page {page_num}. Stopping.")
            break
        
        # Add the scraped data to the list
        all_properties.extend(properties)
        
        # Save progress after each page
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)  # Save to CSV after scraping each page
        
        # Delay to avoid overloading the server
        time.sleep(2)  # Sleep for 2 seconds between requests
    else:
        print(f"Failed to fetch page {page_num}, status code: {response.status_code}")
        break

print(f"Scraping complete. Data saved to '{output_file}'")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...


KeyboardInterrupt: 