In [None]:
pip install -r requirements.txt

## Short Let

In [23]:
import datetime
yesterday = datetime.date.today() - datetime.timedelta(days=+1)
yesterday


datetime.date(2023, 8, 12)

In [26]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures
import datetime

today = datetime.date.today()
yesterday = datetime.date.today() - datetime.timedelta(days=+1)
today = '_' + str(today)

url = [f'https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page={i:d}'  for i in (range(0, 200))]
titles= []
types = []
locations = []
prices = []
date_posted = []
PIDs = []
furnished = []
beds = []
agents = []


def extract_data(url):
    print('began')
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "single-room-sale listings-property")
    for house in house_box:
#titles
        if house.find('h3', class_ = "listings-property-title2") is not None:
            title = house.find('h3', class_ = "listings-property-title2").text
            titles.append(title)
        else:
            titles.append('No title')

#types
        if house.find('h4', class_ = "listings-property-title") is not None:
            type = house.find('h4', class_ = "listings-property-title").text
            types.append(type)
        else:
            types.append('No type')

#locations
        if house.find('h4') is not None:
            locate = house.find_all('h4')
            location = locate[1].text
            locations.append(location)
        else:
            locations.append('No location')

#prices
        if house.find('h3', class_ = "listings-price") is not None:
            price = house.find('h3', class_ = "listings-price").text
            prices.append(price)
        else:
            prices.append('No price')

#date_posted
        if house.find('h5') is not None:
            date = house.find('h5').text
            date_posted.append(date)
        else:
            date_posted.append('No date')

#PIDs
        if house.find('h2') is not None:
            PID = house.find('h2').text.replace('PID:','')
            PIDs.append(PID)
        else:
            PIDs.append('No PID')

#furnished, serviced, newly built
        if house.find('div', class_ = "furnished-btn") is not None:
            furnish = house.find('div', class_ = "furnished-btn").text
            furnished.append(furnish)
        else:
            furnished.append('0')

#utilities
        if house.find('div', class_ = "fur-areea") is not None:
            bed = house.find('div', class_= "fur-areea").text.replace('\n',' ').strip()
            beds.append(bed)
        else:
            beds.append('No beds')
        
#agents
        if house.find('div', class_ = "elite-icon") is not None:
            agent = house.find('div', class_ = "elite-icon").a.get('href')
            agent = agent.replace('/agent/','')
            agents.append(agent)
        else:
            agents.append('No agent')


def transform_data():
    df = pd.DataFrame({'title': titles, 
                            'categories': types,
                            'address': locations,
                            'agent': agents,
                            'price': prices,
                            'date_post': date_posted,
                            'PIDs': PIDs,
                            'furnish': furnished,
                            'bed': beds})

    df['newly_built'] = df['furnish'].apply(lambda text: 'Newly Built' in text)
    df['serviced'] = df['furnish'].apply(lambda text: 'Serviced' in text)
    df['furnished'] = df['furnish'].apply(lambda text: 'Furnished' in text)
    df.drop('furnish', axis=1, inplace=True)
    df[['beds', 'baths', 'toilets']] = df['bed'].str.extract(r'(\d+)\s*beds?\s*(\d*)\s*baths?\s*(\d*)\s*Toilets?')
    df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(0).astype(int)
    df['baths'] = pd.to_numeric(df['baths'], errors='coerce').fillna(0).astype(int)
    df['toilets'] = pd.to_numeric(df['toilets'], errors='coerce').fillna(0).astype(int)

    df['price'] = df['price'].str.replace('₦', '')

    df['price_₦_yearly'] = pd.to_numeric(df['price'].str.replace(',', '').str.extract(r'(\d+)')[0])

    
    df.drop('price', axis=1, inplace=True)
    df.drop('bed', axis=1, inplace=True)

    df['date_posted'] = df['date_post'].str.extract(r'Added (\d{2} \w{3} \d{4})', expand=False)
    df['date_updated'] = df['date_post'].str.extract(r'Updated (\d{2} \w{3} \d{4})', expand=False)
    df['date_posted'] = pd.to_datetime(df['date_posted'], format='%d %b %Y', errors='coerce')
    df['date_updated'] = pd.to_datetime(df['date_updated'], format='%d %b %Y', errors='coerce')
    #df['date_updated'] = df['date_updated'].fillna("not updated")
    df.drop('date_post', axis=1, inplace=True)
    df['state'] = df['address'].str.split().str[-1]
    df = df[(df['date_posted'].dt.date == yesterday) | (df['date_updated'].dt.date == yesterday)]
    
    
    df.to_csv(f'../Real_Estate_data_pipeline/property_csv/propertypro_for_rent{yesterday}.csv', index=False)
    return df

    

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()


began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=0
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=1
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=2
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=3
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=4
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=5
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=6
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=7
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=8
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=9
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=10
began
https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page=11
be

Unnamed: 0,title,categories,address,agent,PIDs,newly_built,serviced,furnished,beds,baths,toilets,price_₦_yearly,date_posted,date_updated,state
23,3 Bedroom Flat,3 BEDROOM FLAT / APARTMENT FOR RENT,Abiola Housing Estate Ayobo Ipaja Lagos,purple-ribbon-properties,9JYKV,False,False,False,3,0,0,700000,2023-08-11,2023-08-12,Lagos
27,2 Bedroom Flat,2 BEDROOM FLAT / APARTMENT FOR RENT,Isokan Estate Ayobo Ipaja Lagos,purple-ribbon-properties,2JYKV,False,False,False,2,0,0,500000,2023-08-11,2023-08-12,Lagos
28,Standard Room And Parlor Selfcontain,1 BEDROOM FLAT / APARTMENT FOR RENT,Ita Olowu Ikorodu Lagos,purple-ribbon-properties,0JYKV,False,False,False,1,0,0,300000,2023-08-11,2023-08-12,Lagos
31,2 Bedroom Flat,2 BEDROOM FLAT / APARTMENT FOR RENT,Millenuim Ups Gbagada Lagos,purple-ribbon-properties,7JYKU,False,False,False,2,0,0,1700000,2023-08-11,2023-08-12,Lagos
32,8 Units Of 3 Bedroom Flat With One Room Bq,3 BEDROOM FLAT / APARTMENT FOR RENT,Lekki Lagos,purple-ribbon-properties,4JYKU,False,False,False,3,0,0,6000000,2023-08-11,2023-08-12,Lagos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9888,Newly Built 2 Bedroom Flat,2 BEDROOM FLAT / APARTMENT FOR RENT,Sangotedo Ajah Lagos,john-seyi,4JUUP,False,False,True,2,2,3,1100000,2023-07-17,2023-08-12,Lagos
9895,Newly Built 2 Bedroom Flat,2 BEDROOM FLAT / APARTMENT FOR RENT,Oke Ira Ogba Lagos,degeniusxpropertyworld,2JUUK,True,False,False,5,5,6,2000000,2023-07-17,2023-08-12,Lagos
9911,Water Front 3 Bedroom Apartment,3 BEDROOM FLAT / APARTMENT FOR RENT,Banana Island Ikoyi Lagos,rentalhub_island,3JUWW,True,True,False,5,6,6,18000000,2023-07-17,2023-08-12,Lagos
9977,4 Bedroom Apartments + Bq,4 BEDROOM HOUSE FOR RENT,Ikate Lekki Lagos,itj-realtor,7JUQW,True,True,True,4,5,5,4500000,2023-07-15,2023-08-12,Lagos


## For sale

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures


url = [f'https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page={i:d}'  for i in (range(0, 865))]
titles= []
types = []
locations = []
prices = []
date_posted = []
PIDs = []
furnished = []
beds = []
agents = []


def extract_data(url):
    print('began')
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "single-room-sale listings-property")
    for house in house_box:
#titles
        if house.find('h3', class_ = "listings-property-title2") is not None:
            title = house.find('h3', class_ = "listings-property-title2").text
            titles.append(title)
        else:
            titles.append('No title')

#types
        if house.find('h4', class_ = "listings-property-title") is not None:
            type = house.find('h4', class_ = "listings-property-title").text
            types.append(type)
        else:
            types.append('No type')

#locations
        if house.find('h4') is not None:
            locate = house.find_all('h4')
            location = locate[1].text
            locations.append(location)
        else:
            locations.append('No location')

#prices
        if house.find('h3', class_ = "listings-price") is not None:
            price = house.find('h3', class_ = "listings-price").text
            prices.append(price)
        else:
            prices.append('No price')

#date_posted
        if house.find('h5') is not None:
            date = house.find('h5').text
            date_posted.append(date)
        else:
            date_posted.append('No date')

#PIDs
        if house.find('h2') is not None:
            PID = house.find('h2').text.replace('PID:','')
            PIDs.append(PID)
        else:
            PIDs.append('No PID')

#furnished, serviced, newly built
        if house.find('div', class_ = "furnished-btn") is not None:
            furnish = house.find('div', class_ = "furnished-btn").text
            furnished.append(furnish)
        else:
            furnished.append('0')

#utilities
        if house.find('div', class_ = "fur-areea") is not None:
            bed = house.find('div', class_= "fur-areea").text.replace('\n',' ').strip()
            beds.append(bed)
        else:
            beds.append('No beds')
        
#agents
        if house.find('div', class_ = "elite-icon") is not None:
            agent = house.find('div', class_ = "elite-icon").a.get('href')
            agent = agent.replace('/agent/','')
            agents.append(agent)
        else:
            agents.append('No agent')


def transform_data():
    df = pd.DataFrame({'title': titles, 
                            'categories': types,
                            'address': locations,
                            'agent': agents,
                            'price': prices,
                            'date_post': date_posted,
                            'PIDs': PIDs,
                            'furnish': furnished,
                            'bed': beds})

    df['newly_built'] = df['furnish'].apply(lambda text: 'Newly Built' in text)
    df['serviced'] = df['furnish'].apply(lambda text: 'Serviced' in text)
    df['furnished'] = df['furnish'].apply(lambda text: 'Furnished' in text)
    df.drop('furnish', axis=1, inplace=True)
    df[['beds', 'baths', 'toilets']] = df['bed'].str.extract(r'(\d+)\s*beds?\s*(\d*)\s*baths?\s*(\d*)\s*Toilets?')
    df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(0).astype(int)
    df['baths'] = pd.to_numeric(df['baths'], errors='coerce').fillna(0).astype(int)
    df['toilets'] = pd.to_numeric(df['toilets'], errors='coerce').fillna(0).astype(int)

    df['price'] = df['price'].str.replace('₦', '')

    df['price_₦'] = pd.to_numeric(df['price'].str.replace(',', '').str.extract(r'(\d+)')[0])

    
    df.drop('price', axis=1, inplace=True)
    df.drop('bed', axis=1, inplace=True)

    df['date_posted'] = df['date_post'].str.extract(r'Added (\d{2} \w{3} \d{4})', expand=False)
    df['date_updated'] = df['date_post'].str.extract(r'Updated (\d{2} \w{3} \d{4})', expand=False)
    df['date_posted'] = pd.to_datetime(df['date_posted'], format='%d %b %Y')
    df['date_updated'] = pd.to_datetime(df['date_updated'], format='%d %b %Y')
    df['date_updated'] = df['date_updated'].fillna("not updated")
    df.drop('date_post', axis=1, inplace=True)
    df['state'] = df['address'].str.split().str[-1]
    df.to_csv('propertypro_for_sale.csv', index=False)
    

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()


began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=0
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=1
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=2
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=3
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=4
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=5
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=6
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=7
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=8
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=9
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=10
began
https://www.propertypro.ng/property-for-sale?sort=postedOn&order=desc&page=11
be

## For Rent

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures

url = [f'https://www.propertypro.ng/property-for-rent?sort=postedOn&order=desc&page={i:d}'  for i in (range(0, 424))]
titles= []
types = []
locations = []
prices = []
date_posted = []
PIDs = []
furnished = []
beds = []
agents = []


def extract_data(url):
    print('began')
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "single-room-sale listings-property")
    for house in house_box:
#titles
        if house.find('h3', class_ = "listings-property-title2") is not None:
            title = house.find('h3', class_ = "listings-property-title2").text
            titles.append(title)
        else:
            titles.append('No title')

#types
        if house.find('h4', class_ = "listings-property-title") is not None:
            type = house.find('h4', class_ = "listings-property-title").text
            types.append(type)
        else:
            types.append('No type')

#locations
        if house.find('h4') is not None:
            locate = house.find_all('h4')
            location = locate[1].text
            locations.append(location)
        else:
            locations.append('No location')

#prices
        if house.find('h3', class_ = "listings-price") is not None:
            price = house.find('h3', class_ = "listings-price").text
            prices.append(price)
        else:
            prices.append('No price')

#date_posted
        if house.find('h5') is not None:
            date = house.find('h5').text
            date_posted.append(date)
        else:
            date_posted.append('No date')

#PIDs
        if house.find('h2') is not None:
            PID = house.find('h2').text.replace('PID:','')
            PIDs.append(PID)
        else:
            PIDs.append('No PID')

#furnished, serviced, newly built
        if house.find('div', class_ = "furnished-btn") is not None:
            furnish = house.find('div', class_ = "furnished-btn").text
            furnished.append(furnish)
        else:
            furnished.append('0')

#utilities
        if house.find('div', class_ = "fur-areea") is not None:
            bed = house.find('div', class_= "fur-areea").text.replace('\n',' ').strip()
            beds.append(bed)
        else:
            beds.append('No beds')
        
#agents
        if house.find('div', class_ = "elite-icon") is not None:
            agent = house.find('div', class_ = "elite-icon").a.get('href')
            agent = agent.replace('/agent/','')
            agents.append(agent)
        else:
            agents.append('No agent')


def transform_data():
    df = pd.DataFrame({'title': titles, 
                            'categories': types,
                            'address': locations,
                            'agent': agents,
                            'price': prices,
                            'date_post': date_posted,
                            'PIDs': PIDs,
                            'furnish': furnished,
                            'bed': beds})

    df['newly_built'] = df['furnish'].apply(lambda text: 'Newly Built' in text)
    df['serviced'] = df['furnish'].apply(lambda text: 'Serviced' in text)
    df['furnished'] = df['furnish'].apply(lambda text: 'Furnished' in text)
    df.drop('furnish', axis=1, inplace=True)
    df[['beds', 'baths', 'toilets']] = df['bed'].str.extract(r'(\d+)\s*beds?\s*(\d*)\s*baths?\s*(\d*)\s*Toilets?')
    df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(0).astype(int)
    df['baths'] = pd.to_numeric(df['baths'], errors='coerce').fillna(0).astype(int)
    df['toilets'] = pd.to_numeric(df['toilets'], errors='coerce').fillna(0).astype(int)

    df['price'] = df['price'].str.replace('₦', '')

    df['price_₦_yearly'] = pd.to_numeric(df['price'].str.replace(',', '').str.extract(r'(\d+)')[0])

    
    df.drop('price', axis=1, inplace=True)
    df.drop('bed', axis=1, inplace=True)

    df['date_posted'] = df['date_post'].str.extract(r'Added (\d{2} \w{3} \d{4})', expand=False)
    df['date_updated'] = df['date_post'].str.extract(r'Updated (\d{2} \w{3} \d{4})', expand=False)
    df['date_posted'] = pd.to_datetime(df['date_posted'], format='%d %b %Y')
    df['date_updated'] = pd.to_datetime(df['date_updated'], format='%d %b %Y')
    df['date_updated'] = df['date_updated'].fillna("not updated")
    df.drop('date_post', axis=1, inplace=True)
    df['state'] = df['address'].str.split().str[-1]
    df.to_csv('propertypro_for_rent.csv', index=False)
    return df
    

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()


# Agents

In [21]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import concurrent.futures
import datetime
today = datetime.date.today()
today = '_' + str(today)
url = [f'https://www.propertypro.ng/agents?page={i:d}'  for i in (range(0, 2653))]

names = []
locations = []
registered = []



def extract_data(url):
    print('began')
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text,  "html.parser")
    house_box = soup.find_all('div', class_ = "agent-rp-inner")
    for house in house_box:
#names
        if house.find('div', class_ = "rp-left-text") is not None:
            name = house.find('div', class_ = "rp-left-text").h2.text.replace('\n','')
            names.append(name)
        else:
            names.append('No name')

        if house.find('div', class_ = "rp-left-text") is not None:
            locate = house.find('div', class_ = "rp-left-text").h4.text
            locations.append(locate)
        else:
            locations.append('None')

        if house.find('div', class_ = "rp-left-text") is not None:
            register = house.find('div', class_ = "rp-left-text").text.replace('\n','')
            register = register[register.find('Registered on'):]
            registered.append(register)
        else:
            registered.append('None')

def transform_data():
    df = pd.DataFrame({'name': names, 
                        'located': locations,
                        'registered_on': registered
                        })
    df['registered_on'] = df['registered_on'].str.replace('Registered on ', '')
    df['registered_on'] = df['registered_on'].str.replace('agent', '')
    df.to_csv(f'../Real_Estate_data_pipeline/property_csv/agents{today}.csv',index=False)

with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    executor.map(extract_data, url)

transform_data()

began
https://www.propertypro.ng/agents?page=0
began
https://www.propertypro.ng/agents?page=1
began
https://www.propertypro.ng/agents?page=2
began
https://www.propertypro.ng/agents?page=3
began
https://www.propertypro.ng/agents?page=4
began
https://www.propertypro.ng/agents?page=5
began
https://www.propertypro.ng/agents?page=6
began
https://www.propertypro.ng/agents?page=7
began
https://www.propertypro.ng/agents?page=8
began
https://www.propertypro.ng/agents?page=9
began
https://www.propertypro.ng/agents?page=10
began
https://www.propertypro.ng/agents?page=11
began
https://www.propertypro.ng/agents?page=12
began
https://www.propertypro.ng/agents?page=13
began
https://www.propertypro.ng/agents?page=14
began
https://www.propertypro.ng/agents?page=15
began
https://www.propertypro.ng/agents?page=16
began
https://www.propertypro.ng/agents?page=17
began
https://www.propertypro.ng/agents?page=18
began
https://www.propertypro.ng/agents?page=19
began
https://www.propertypro.ng/agents?page=20
be