In [7]:
from bs4 import BeautifulSoup, Tag
import requests
from markdownify import markdownify as md
import json
import re
import os
import time
from tqdm import tqdm

In [8]:
base_url = 'https://nigeriapropertycentre.com/market-trends/average-prices/for-rent/flats-apartments/abuja'
path = '../data_fetched/prices'
delay = 10 # seconds
bs_parser = 'html.parser'

if not os.path.exists(path):
    os.mkdir(path)

In [9]:
def fetch_page(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to fetch {url} with status code {response.status_code}")
    except Exception as e:
        print(f"Error fetching {url}: {e}")
    return None


In [10]:
def extract_table(html_content):
    data = {}
    soup = BeautifulSoup(html_content, bs_parser)

    # Extract the title
    title_tag = soup.find('h1', class_='page-title text-center')
    if title_tag:
        title = title_tag.get_text(strip=True)
    else:
        title = 'No title found'
    data['title'] = title

    # Extract the table
    table_section = soup.find('table', class_='google-visualization-table-table')
    table_data = []

    if isinstance(table_section, Tag):
        # Extract table headers
        header_row = table_section.find('tr', class_='google-visualization-table-tr-head')
        if isinstance(header_row, Tag):
            headers = [th.get_text(strip=True) for th in header_row.find_all('th')]
            table_data.append(headers)

        # Extract table rows
        rows = table_section.find_all('tr', class_='google-visualization-table-tr-even')
        for row in rows:
            cols = row.find_all('td')
            cols_data = [col.get_text(strip=True) for col in cols]
            table_data.append(cols_data)
    else:
        table_data = []

    data['table'] = table_data

    return data

In [11]:
def save_to_json(data, filename):
    filename = filename.strip().replace(' ', '_')
    with open(os.path.join(path, f'{filename}.json'), 'w') as f:
        json.dump(data, f)

In [12]:
def main():
    html_content = fetch_page(base_url)
    if html_content:
        extracted_data = extract_table(html_content)
        filename = extracted_data.get('title')
        save_to_json(extracted_data, filename)
        print(f'{filename} has been saved to {path}')
        print(extracted_data)
    else:
        print(f"Failed to fetch {base_url}")

if __name__ == '__main__':
    main()

Average Price of Flats for Rent in Abuja has been saved to ../data_fetched/prices
{'title': 'Average Price of Flats for Rent in Abuja', 'table': [['Area', 'Average\nPrice', 'Max. Property\nPrice', 'Min. Property\nPrice', 'Total Property\nCount', 'New Property\nAdded'], ['Maitama District', '₦10,830,000', '₦140,000,000', '₦900,000', '169', '24'], ['Wuse 2', '₦9,580,000', '₦3,000,000,000', '₦55,000', '213', '42'], ['Asokoro District', '₦7,960,000', '₦16,000,000,000', '₦1,000,000', '217', '29'], ['Guzape District', '₦5,620,000', '₦100,000,000', '₦450,000', '210', '37'], ['Jabi', '₦5,230,000', '₦60,000,000', '₦900,000', '271', '87'], ['Kukwaba', '₦4,600,000', '₦9,000,000', '₦1,600,000', '4', '1'], ['Jahi', '₦4,490,000', '₦400,000,000', '₦800,000', '793', '129'], ['Wuye', '₦4,350,000', '₦280,000,000', '₦800,000', '395', '49'], ['Katampe', '₦4,320,000', '₦85,000,000', '₦1,000,000', '593', '95'], ['Utako', '₦3,850,000', '₦66,000,000', '₦1,200,000', '197', '21'], ['Gudu', '₦3,790,000', '₦13,00