In [None]:
pip install -r requirements.txt

## Short Let

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures


url = [f'https://www.propertypro.ng/property-for-short-let?sort=beds&order=desc&page={i:d}'  for i in (range(0, 67))]


titles= []
types = []
locations = []
prices = []
date_posted = []
PIDs = []
furnished = []
beds = []
agents = []


def extract_data(url):
    print('began')
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "single-room-sale listings-property")
    for house in house_box:
#titles
        if house.find('h3', class_ = "listings-property-title2") is not None:
            title = house.find('h3', class_ = "listings-property-title2").text
            titles.append(title)
        else:
            titles.append('No title')

#types
        if house.find('h4', class_ = "listings-property-title") is not None:
            type = house.find('h4', class_ = "listings-property-title").text
            types.append(type)
        else:
            types.append('No type')

#locations
        if house.find('h4') is not None:
            locate = house.find_all('h4')
            location = locate[1].text
            locations.append(location)
        else:
            locations.append('No location')

#prices
        if house.find('h3', class_ = "listings-price") is not None:
            price = house.find('h3', class_ = "listings-price").text
            prices.append(price)
        else:
            prices.append('No price')

#date_posted
        if house.find('h5') is not None:
            date = house.find('h5').text
            date_posted.append(date)
        else:
            date_posted.append('No date')

#PIDs
        if house.find('h2') is not None:
            PID = house.find('h2').text.replace('PID:','')
            PIDs.append(PID)
        else:
            PIDs.append('No PID')

#furnished, serviced, newly built
        if house.find('div', class_ = "furnished-btn") is not None:
            furnish = house.find('div', class_ = "furnished-btn").text
            furnished.append(furnish)
        else:
            furnished.append('0')

#utilities
        if house.find('div', class_ = "fur-areea") is not None:
            bed = house.find('div', class_= "fur-areea").text.replace('\n',' ').strip()
            beds.append(bed)
        else:
            beds.append('No beds')
        
#agents
        if house.find('div', class_ = "elite-icon") is not None:
            agent = house.find('div', class_ = "elite-icon").a.get('href')
            agent = agent.replace('/agent/','')
            agents.append(agent)
        else:
            agents.append('No agent')


def transform_data():
    df = pd.DataFrame({'title': titles, 
                            'categories': types,
                            'address': locations,
                            'agent': agents,
                            'price': prices,
                            'date_post': date_posted,
                            'PIDs': PIDs,
                            'furnish': furnished,
                            'bed': beds})

    df['newly_built'] = df['furnish'].apply(lambda text: 'Newly Built' in text)
    df['serviced'] = df['furnish'].apply(lambda text: 'Serviced' in text)
    df['furnished'] = df['furnish'].apply(lambda text: 'Furnished' in text)
    df.drop('furnish', axis=1, inplace=True)
    df[['beds', 'baths', 'toilets']] = df['bed'].str.extract(r'(\d+)\s*beds?\s*(\d*)\s*baths?\s*(\d*)\s*Toilets?')
    df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(0).astype(int)
    df['baths'] = pd.to_numeric(df['baths'], errors='coerce').fillna(0).astype(int)
    df['toilets'] = pd.to_numeric(df['toilets'], errors='coerce').fillna(0).astype(int)

    df['price'] = df['price'].str.replace('₦', '')

    df['price_int'] = pd.to_numeric(df['price'].str.replace(',', '').str.extract(r'(\d+)')[0])

    df['price_per_day_₦'] = df['price_per_month_₦'] = df['price_per_year_₦'] = pd.NA

    for index, row in df.iterrows():
        if 'day' in row['price']:
            df.at[index, 'price_per_month_₦'] = row['price_int'] * 30
            df.at[index, 'price_per_year_₦'] = row['price_int'] * 365
            df.at[index, 'price_per_day_₦'] = row['price_int']
        elif 'month' in row['price']:
            df.at[index, 'price_per_day_₦'] = row['price_int'] / 30
            df.at[index, 'price_per_year_₦'] = row['price_int'] * 12
            df.at[index, 'price_per_month_₦'] = row['price_int']
        elif 'year' in row['price']:
            df.at[index, 'price_per_month_₦'] = row['price_int'] / 12
            df.at[index, 'price_per_day_₦'] = row['price_int'] / 365
            df.at[index, 'price_per_year_₦'] = row['price_int']
        else:
            df.at[index, 'price_per_year_₦'] = row['price_int'] * 365
            df.at[index, 'price_per_month_₦'] = row['price_int'] * 12
            df.at[index, 'price_per_day_₦'] = row['price_int']

    df.drop('price_int', axis=1, inplace=True)
    df.drop('bed', axis=1, inplace=True)

    df['date_posted'] = df['date_post'].str.extract(r'Added (\d{2} \w{3} \d{4})', expand=False)
    df['date_updated'] = df['date_post'].str.extract(r'Updated (\d{2} \w{3} \d{4})', expand=False)
    df['date_posted'] = pd.to_datetime(df['date_posted'], format='%d %b %Y')
    df['date_updated'] = pd.to_datetime(df['date_updated'], format='%d %b %Y')
    df.drop('date_post', axis=1, inplace=True)
    df['state'] = df['address'].str.split().str[-1]
    df.to_csv('propertypro_short_let.csv', index=False)

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()

## For sale

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures


url = [f'https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page={i:d}'  for i in (range(0, 865))]
titles= []
types = []
locations = []
prices = []
date_posted = []
PIDs = []
furnished = []
beds = []
agents = []


def extract_data(url):
    print('began')
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "single-room-sale listings-property")
    for house in house_box:
#titles
        if house.find('h3', class_ = "listings-property-title2") is not None:
            title = house.find('h3', class_ = "listings-property-title2").text
            titles.append(title)
        else:
            titles.append('No title')

#types
        if house.find('h4', class_ = "listings-property-title") is not None:
            type = house.find('h4', class_ = "listings-property-title").text
            types.append(type)
        else:
            types.append('No type')

#locations
        if house.find('h4') is not None:
            locate = house.find_all('h4')
            location = locate[1].text
            locations.append(location)
        else:
            locations.append('No location')

#prices
        if house.find('h3', class_ = "listings-price") is not None:
            price = house.find('h3', class_ = "listings-price").text
            prices.append(price)
        else:
            prices.append('No price')

#date_posted
        if house.find('h5') is not None:
            date = house.find('h5').text
            date_posted.append(date)
        else:
            date_posted.append('No date')

#PIDs
        if house.find('h2') is not None:
            PID = house.find('h2').text.replace('PID:','')
            PIDs.append(PID)
        else:
            PIDs.append('No PID')

#furnished, serviced, newly built
        if house.find('div', class_ = "furnished-btn") is not None:
            furnish = house.find('div', class_ = "furnished-btn").text
            furnished.append(furnish)
        else:
            furnished.append('0')

#utilities
        if house.find('div', class_ = "fur-areea") is not None:
            bed = house.find('div', class_= "fur-areea").text.replace('\n',' ').strip()
            beds.append(bed)
        else:
            beds.append('No beds')
        
#agents
        if house.find('div', class_ = "elite-icon") is not None:
            agent = house.find('div', class_ = "elite-icon").a.get('href')
            agent = agent.replace('/agent/','')
            agents.append(agent)
        else:
            agents.append('No agent')


def transform_data():
    df = pd.DataFrame({'title': titles, 
                            'categories': types,
                            'address': locations,
                            'agent': agents,
                            'price': prices,
                            'date_post': date_posted,
                            'PIDs': PIDs,
                            'furnish': furnished,
                            'bed': beds})

    df['newly_built'] = df['furnish'].apply(lambda text: 'Newly Built' in text)
    df['serviced'] = df['furnish'].apply(lambda text: 'Serviced' in text)
    df['furnished'] = df['furnish'].apply(lambda text: 'Furnished' in text)
    df.drop('furnish', axis=1, inplace=True)
    df[['beds', 'baths', 'toilets']] = df['bed'].str.extract(r'(\d+)\s*beds?\s*(\d*)\s*baths?\s*(\d*)\s*Toilets?')
    df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(0).astype(int)
    df['baths'] = pd.to_numeric(df['baths'], errors='coerce').fillna(0).astype(int)
    df['toilets'] = pd.to_numeric(df['toilets'], errors='coerce').fillna(0).astype(int)

    df['price'] = df['price'].str.replace('₦', '')

    df['price_₦'] = pd.to_numeric(df['price'].str.replace(',', '').str.extract(r'(\d+)')[0])

    
    df.drop('price', axis=1, inplace=True)
    df.drop('bed', axis=1, inplace=True)

    df['date_posted'] = df['date_post'].str.extract(r'Added (\d{2} \w{3} \d{4})', expand=False)
    df['date_updated'] = df['date_post'].str.extract(r'Updated (\d{2} \w{3} \d{4})', expand=False)
    df['date_posted'] = pd.to_datetime(df['date_posted'], format='%d %b %Y')
    df['date_updated'] = pd.to_datetime(df['date_updated'], format='%d %b %Y')
    df['date_updated'] = df['date_updated'].fillna("not updated")
    df.drop('date_post', axis=1, inplace=True)
    df['state'] = df['address'].str.split().str[-1]
    df.to_csv('propertypro_for_sale.csv', index=False)
    

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()


began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=0
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=1
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=2
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=3
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=4
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=5
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=6
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=7
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=8
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=9
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=10
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=11
be

## For Rent

In [21]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures

url = [f'https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page={i:d}'  for i in (range(0, 424))]
titles= []
types = []
locations = []
prices = []
date_posted = []
PIDs = []
furnished = []
beds = []
agents = []


def extract_data(url):
    print('began')
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "single-room-sale listings-property")
    for house in house_box:
#titles
        if house.find('h3', class_ = "listings-property-title2") is not None:
            title = house.find('h3', class_ = "listings-property-title2").text
            titles.append(title)
        else:
            titles.append('No title')

#types
        if house.find('h4', class_ = "listings-property-title") is not None:
            type = house.find('h4', class_ = "listings-property-title").text
            types.append(type)
        else:
            types.append('No type')

#locations
        if house.find('h4') is not None:
            locate = house.find_all('h4')
            location = locate[1].text
            locations.append(location)
        else:
            locations.append('No location')

#prices
        if house.find('h3', class_ = "listings-price") is not None:
            price = house.find('h3', class_ = "listings-price").text
            prices.append(price)
        else:
            prices.append('No price')

#date_posted
        if house.find('h5') is not None:
            date = house.find('h5').text
            date_posted.append(date)
        else:
            date_posted.append('No date')

#PIDs
        if house.find('h2') is not None:
            PID = house.find('h2').text.replace('PID:','')
            PIDs.append(PID)
        else:
            PIDs.append('No PID')

#furnished, serviced, newly built
        if house.find('div', class_ = "furnished-btn") is not None:
            furnish = house.find('div', class_ = "furnished-btn").text
            furnished.append(furnish)
        else:
            furnished.append('0')

#utilities
        if house.find('div', class_ = "fur-areea") is not None:
            bed = house.find('div', class_= "fur-areea").text.replace('\n',' ').strip()
            beds.append(bed)
        else:
            beds.append('No beds')
        
#agents
        if house.find('div', class_ = "elite-icon") is not None:
            agent = house.find('div', class_ = "elite-icon").a.get('href')
            agent = agent.replace('/agent/','')
            agents.append(agent)
        else:
            agents.append('No agent')


def transform_data():
    df = pd.DataFrame({'title': titles, 
                            'categories': types,
                            'address': locations,
                            'agent': agents,
                            'price': prices,
                            'date_post': date_posted,
                            'PIDs': PIDs,
                            'furnish': furnished,
                            'bed': beds})

    df['newly_built'] = df['furnish'].apply(lambda text: 'Newly Built' in text)
    df['serviced'] = df['furnish'].apply(lambda text: 'Serviced' in text)
    df['furnished'] = df['furnish'].apply(lambda text: 'Furnished' in text)
    df.drop('furnish', axis=1, inplace=True)
    df[['beds', 'baths', 'toilets']] = df['bed'].str.extract(r'(\d+)\s*beds?\s*(\d*)\s*baths?\s*(\d*)\s*Toilets?')
    df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(0).astype(int)
    df['baths'] = pd.to_numeric(df['baths'], errors='coerce').fillna(0).astype(int)
    df['toilets'] = pd.to_numeric(df['toilets'], errors='coerce').fillna(0).astype(int)

    df['price'] = df['price'].str.replace('₦', '')

    df['price_₦_yearly'] = pd.to_numeric(df['price'].str.replace(',', '').str.extract(r'(\d+)')[0])

    
    df.drop('price', axis=1, inplace=True)
    df.drop('bed', axis=1, inplace=True)

    df['date_posted'] = df['date_post'].str.extract(r'Added (\d{2} \w{3} \d{4})', expand=False)
    df['date_updated'] = df['date_post'].str.extract(r'Updated (\d{2} \w{3} \d{4})', expand=False)
    df['date_posted'] = pd.to_datetime(df['date_posted'], format='%d %b %Y')
    df['date_updated'] = pd.to_datetime(df['date_updated'], format='%d %b %Y')
    df['date_updated'] = df['date_updated'].fillna("not updated")
    df.drop('date_post', axis=1, inplace=True)
    df['state'] = df['address'].str.split().str[-1]
    df.to_csv('propertypro_for_rent.csv', index=False)
    return df
    

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()


https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=0
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=1
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=2
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=3
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=4
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=5
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=6
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=7
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=8
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=9
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=10
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=11
began
ht

Unnamed: 0,title,categories,address,agent,PIDs,newly_built,serviced,furnished,beds,baths,toilets,price_₦_yearly,date_posted,date_updated,state
0,A Luxuriously Built 2 Bedroom Apartment,2 BEDROOM HOUSE FOR RENT,Ilasan Elegushi Lekki Ilasan Lekki Lagos,chukwuemeka-felix,6JXDL,True,True,False,2,2,3,3200000,2023-08-02,not updated,Lagos
1,Newly Built Mini Flat In A Serene Environment,1 BEDROOM FLAT / APARTMENT FOR RENT,Ifako Ogba Ogba Lagos,city-garden-real-estate-ltd,5JXDL,False,False,False,1,0,0,500000,2023-08-02,not updated,Lagos
2,Sharp Two Bedroom Apartment In An Estate,2 BEDROOM FLAT / APARTMENT FOR RENT,Peacevile Estate Badore Badore Ajah Lagos,156,4JXDL,False,False,False,2,0,0,1500000,2023-08-02,not updated,Lagos
3,Newly Built 2 Bedroom Apartment,2 BEDROOM FLAT / APARTMENT FOR RENT,Ifako Ogba Ogba Lagos,city-garden-real-estate-ltd,3JXDL,True,False,False,2,0,0,1000000,2023-08-02,not updated,Lagos
4,Beautiful Luxury Serviced 1 Bedroom Apartment,1 BEDROOM HOUSE FOR RENT,Lekki Phase 1 Lekki Lagos,sanrealtor,2JXDL,True,True,False,1,1,2,4000000,2023-08-02,not updated,Lagos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21160,A Tastefully Finished 5bedroom Fully Detached ...,5 BEDROOM DETACHED DUPLEX HOUSE FOR RENT,Jabi District Jabi Abuja,austelright-co,4EMYX,False,False,False,5,5,6,12000000,2021-04-07,not updated,Abuja
21161,Newly Built Mini Flat In A Serene Environment,1 BEDROOM MINI FLAT MINI FLAT FLAT / APARTMENT...,Lerato Ahmadiyya Abule Egba Abule Egba Lagos,tobbylex-properties-consultant,4EDUC,True,False,True,1,1,1,250000,2021-01-04,not updated,Lagos
21162,Brand New 4bedroom Terrace Duplex,4 BEDROOM TERRACED DUPLEX HOUSE FOR RENT,2nd Toll Gate Lekki Lagos,gideon4,4DSGL,False,True,False,4,4,5,3000000,2020-08-10,not updated,Lagos
21163,Newly Built 2 Bedroom Flat,2 BEDROOM FLAT / APARTMENT FOR RENT,Ebute Metta Ebute Metta Yaba Lagos,stephen-ogweh,1DRGW,True,False,False,2,2,2,800000,2020-08-03,not updated,Lagos
