In [1]:
import requests
from bs4 import BeautifulSoup
import csv

In [1]:
import pandas as pd

# read both CSV files
df_rent = pd.read_csv('property_choice_website_data_for_rent_with_link.csv')
df_sale = pd.read_csv('property_choice_web_data_for_sale_with_link.csv')

# concatenate the two dataframes vertically and preserve column order
df_combined = pd.concat([df_rent[['Link', 'Title', 'Price', 'Property ID', 'Area', 'Floor', 'Bedrooms', 'Bathrooms', 'Description', 'Latitude', 'Longitude', 'Features', 'Location', 'Status', 'Garages', 'Type']],
                        df_sale[['Link', 'Title', 'Price', 'Property ID', 'Area', 'Floor', 'Bedrooms', 'Bathrooms', 'Description', 'Latitude', 'Longitude', 'Features', 'Location', 'Status', 'Garages', 'Type']]])

# write the combined dataframe to a new CSV file
df_combined.to_csv('property_choice_website_data_with_link.csv', index=False)


In [2]:
# !pip install selenium

In [3]:
# !pip install requests-html

## This code is for getting the links of the properties which are for rent.

In [4]:
import requests
from bs4 import BeautifulSoup
import csv

base_url = 'https://www.pchoicebd.com/property-status/for-rent/'
property_links = []

# Iterate through pages 1 to 21
for page_number in range(1, 22):
    if page_number == 1:
        url = base_url
    else:
        url = f"{base_url}page/{page_number}/"

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all article elements
    articles = soup.find_all('article')

    # Extract the links from the articles
    for article in articles:
        link = article.find('a', class_='btn-default')
        if link:
            property_links.append(link['href'])

# Save the links to a CSV file
with open('property_choice_web_links_rent.csv', 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['property_links'])

    for link in property_links:
        csv_writer.writerow([link])

print("Property links saved to property_choice_web_links_rent.csv")


Property links saved to property_choice_web_links_rent.csv


## This code is for getting the links of the properties which are for sale.

In [5]:
import requests
from bs4 import BeautifulSoup
import csv

url = 'https://www.pchoicebd.com/property-status/for-sale/'

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find all article elements
articles = soup.find_all('article')

property_links = []

# Extract the links from the articles
for article in articles:
    link = article.find('a', class_='btn-default')
    if link:
        property_links.append(link['href'])

# Save the links to a CSV file
with open('property_choice_web_links_sale.csv', 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['property_links'])

    for link in property_links:
        csv_writer.writerow([link])


## The code below gets the link of the properties for rent. This also rechecks how many properties are actually there. Because it says 1000+ in the home page but actually just 200+. Perhaps the others were deleted already.

In [9]:
import requests
from bs4 import BeautifulSoup
import csv

base_url = 'https://www.pchoicebd.com/property-status/for-rent/'
property_links = []

# Iterate through pages 1 to 21
for page_number in range(1, 22):
    if page_number == 1:
        url = base_url
    else:
        url = f"{base_url}page/{page_number}/"

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all article elements
    articles = soup.find_all('article')
    print(f"Page {page_number}: {len(articles)} articles found")  # Print the number of articles found on each page

    # Extract the links from the articles
    for article in articles:
        link = article.find('a', class_='btn-default')
        if link:
            property_links.append(link['href'])

# Save the links to a CSV file
with open('property_choice_web_links_rent2.csv', 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['property_links'])

    for link in property_links:
        csv_writer.writerow([link])

print("Property links saved to property_choice_web_links_rent2.csv")


Page 1: 47 articles found
Page 2: 34 articles found
Page 3: 20 articles found
Page 4: 18 articles found
Page 5: 7 articles found
Page 6: 8 articles found
Page 7: 8 articles found
Page 8: 9 articles found
Page 9: 7 articles found
Page 10: 2 articles found
Page 11: 9 articles found
Page 12: 2 articles found
Page 13: 8 articles found
Page 14: 8 articles found
Page 15: 6 articles found
Page 16: 3 articles found
Page 17: 5 articles found
Page 18: 8 articles found
Page 19: 13 articles found
Page 20: 6 articles found
Page 21: 14 articles found
Property links saved to property_choice_web_links_rent2.csv


## (for rent properties) This code is for getting the data and including the link of the property.

In [7]:
import requests
from bs4 import BeautifulSoup
import csv

def find_text_or_none(soup, tag, class_=None, text=None):
    """Find a tag in the soup that matches the given tag, class, and text,
    and return its text. If no such tag is found, return None."""
    if text:
        found_tag = soup.find(tag, class_=class_, text=text)
    else:
        found_tag = soup.find(tag, class_=class_)
    if found_tag:
        return found_tag.text.strip()
    else:
        return None

property_links = []
with open('property_choice_web_links_rent.csv', 'r', newline='', encoding='utf-8') as csvfile:
    csv_reader = csv.reader(csvfile)
    next(csv_reader)  # Skip header
    for row in csv_reader:
        property_links.append(row[0])

property_data = []

for url in property_links:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    property_title = find_text_or_none(soup, "h1", class_="entry-title single-property-title")
    price = find_text_or_none(soup, "span", class_="single-property-price price")
    property_id = find_text_or_none(soup, "span", class_="meta-item-value")
    area = soup.find("span", class_="meta-item-label", text="Area").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Area") else None
    floor = soup.find("span", class_="meta-item-label", text="Floor").find_next("span", class_="meta-item-value", string=True).text if soup.find("span", class_="meta-item-label", text="Floor") else None
    bedrooms = soup.find("span", class_="meta-item-label", text="Bedrooms").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Bedrooms") else None
    bathrooms = soup.find("span", class_="meta-item-label", text="Bathrooms").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Bathrooms") else None
    location = soup.find("span", class_="meta-item-label", text="Location").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Location") else None
    status = soup.find("span", class_="meta-item-label", text="Status").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Status") else None
    garages = soup.find("span", class_="meta-item-label", text="Garages").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Garages") else None
    property_type = soup.find("span", class_="meta-item-label", text="Type").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Type") else None
    description = find_text_or_none(soup, "div", class_="property-content").strip() if find_text_or_none(soup, "div", class_="property-content") else None

    latitude = "latitude_placeholder"  #
    latitude = "latitude_placeholder"  # Replace with code to extract latitude
    longitude = "longitude_placeholder" # Replace with code to extract longitude

    features_list = soup.find("ul", class_="property-features-list clearfix")
    features = [li.text for li in features_list.find_all("li")] if features_list else []

    # Add the link to the property data
    property_data.append([url, property_title, price, property_id, area, floor, bedrooms, bathrooms, description, latitude, longitude, features, location, status, garages, property_type])

with open('property_choice_website_data_for_rent_with_link.csv', mode='w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Link', 'Title', 'Price', 'Property ID', 'Area', 'Floor', 'Bedrooms', 'Bathrooms', 'Description', 'Latitude', 'Longitude', 'Features', 'Location', 'Status', 'Garages', 'Type'])
    for data in property_data:
        writer.writerow(data)

print("Data saved to property_choice_website_data_for_rent_with_link.csv")

Data saved to property_choice_website_data_for_rent_with_link.csv


## (for sale properties) This code is for getting the data and including the link of the property.

In [8]:
import requests
from bs4 import BeautifulSoup
import csv

def find_text_or_none(soup, tag, class_=None, text=None):
    """Find a tag in the soup that matches the given tag, class, and text,
    and return its text. If no such tag is found, return None."""
    if text:
        found_tag = soup.find(tag, class_=class_, text=text)
    else:
        found_tag = soup.find(tag, class_=class_)
    if found_tag:
        return found_tag.text.strip()
    else:
        return None

property_links = []
with open('property_choice_web_links_sale.csv', 'r', newline='', encoding='utf-8') as csvfile:
    csv_reader = csv.reader(csvfile)
    next(csv_reader)  # Skip header
    for row in csv_reader:
        property_links.append(row[0])

property_data = []

for url in property_links:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    property_title = find_text_or_none(soup, "h1", class_="entry-title single-property-title")
    price = find_text_or_none(soup, "span", class_="single-property-price price")
    property_id = find_text_or_none(soup, "span", class_="meta-item-value")
    area = soup.find("span", class_="meta-item-label", text="Area").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Area") else None
    floor = soup.find("span", class_="meta-item-label", text="Floor").find_next("span", class_="meta-item-value", string=True).text if soup.find("span", class_="meta-item-label", text="Floor") else None
    bedrooms = soup.find("span", class_="meta-item-label", text="Bedrooms").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Bedrooms") else None
    bathrooms = soup.find("span", class_="meta-item-label", text="Bathrooms").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Bathrooms") else None
    location = soup.find("span", class_="meta-item-label", text="Location").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Location") else None
    status = soup.find("span", class_="meta-item-label", text="Status").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Status") else None
    garages = soup.find("span", class_="meta-item-label", text="Garages").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Garages") else None
    property_type = soup.find("span", class_="meta-item-label", text="Type").find_next("span", class_="meta-item-value").text if soup.find("span", class_="meta-item-label", text="Type") else None
    description = find_text_or_none(soup, "div", class_="property-content").strip() if find_text_or_none(soup, "div", class_="property-content") else None

    latitude = "latitude_placeholder"  #
    latitude = "latitude_placeholder"  # Replace with code to extract latitude
    longitude = "longitude_placeholder" # Replace with code to extract longitude

    features_list = soup.find("ul", class_="property-features-list clearfix")
    features = [li.text for li in features_list.find_all("li")] if features_list else []

    # Add the link to the property data
    property_data.append([url, property_title, price, property_id, area, floor, bedrooms, bathrooms, description, latitude, longitude, features, location, status, garages, property_type])

with open('property_choice_web_data_for_sale_with_link.csv', mode='w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Link', 'Title', 'Price', 'Property ID', 'Area', 'Floor', 'Bedrooms', 'Bathrooms', 'Description', 'Latitude', 'Longitude', 'Features', 'Location', 'Status', 'Garages', 'Type'])
    for data in property_data:
        writer.writerow(data)

print("Data saved to property_choice_web_data_for_sale_with_link.csv")

Data saved to property_choice_web_data_for_sale_with_link.csv
