In [1]:
from bs4 import BeautifulSoup, Tag
import requests
from markdownify import markdownify as md
import json
import re
import os
import time
from tqdm import tqdm

In [2]:
base_url = 'https://nigeriapropertycentre.com/market-trends/average-prices/for-rent/flats-apartments/lagos'
path = '../data_fetched/prices'
delay = 10 # seconds
bs_parser = 'html.parser'

if not os.path.exists(path):
    os.mkdir(path)

In [3]:
def fetch_page(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            print(f"failed to fetch response from {url} with status code {response.status_code}")
    except Exception as e:
        print(f"failed to fetch response from {url} with error {e}")

In [4]:
# def extract_table(html_content):
#     data = {}
#     soup = BeautifulSoup(html_content, bs_parser)

#     # Extract the title
#     title = soup.find('h1', class_='page-title text-center')
#     if title:
#         title = title.get_text(strip=True)
#     else:
#         title = 'No title found'
#     data['title'] = title

#     # Extract the table
#     table_section = soup.find('table', class_='google-visualization-table-table')
#     if isinstance(table_section, Tag):
#         table = table_section.find_all('tr', class_='google-visualization-table-tr-even')
#     else:
#         table = []
#     data['table'] = table

#     return data


def extract_table(html_content):
    data = {}
    soup = BeautifulSoup(html_content, bs_parser)

    # Extract the title
    title_tag = soup.find('h1', class_='page-title text-center')
    if title_tag:
        title = title_tag.get_text(strip=True)
    else:
        title = 'No title found'
    data['title'] = title

    # Extract the table
    table_section = soup.find('table', class_='google-visualization-table-table')
    table_data = []

    if isinstance(table_section, Tag):
        # Extract table headers
        header_row = table_section.find('tr', class_='google-visualization-table-tr-head')
        if isinstance(header_row, Tag):
            headers = [th.get_text(strip=True) for th in header_row.find_all('th')]
            table_data.append(headers)

        # Extract table rows
        rows = table_section.find_all('tr', class_='google-visualization-table-tr-even')
        for row in rows:
            cols = row.find_all('td')
            cols_data = [col.get_text(strip=True) for col in cols]
            table_data.append(cols_data)
    else:
        table_data = []

    data['table'] = table_data

    return data


In [5]:
# def convert_to_json(data):
#     json_data = json.dumps(data, indent=4)
#     return json_data

In [6]:
def save_json(data, filename):
    filename = filename.strip().replace(' ', '_')
    with open(os.path.join(path, f'{filename}.json'), 'w') as f:
        json.dump(data, f)

In [7]:
def main():
    html_content = fetch_page(base_url)
    if html_content:
        extracted_data = extract_table(html_content)
        filename = extracted_data['title']
        # json_data = convert_to_json(extracted_data)
        save_json(extracted_data, filename)
        print(f'{filename} has been saved to {path}')
        print(f'Extracted data: {extracted_data}')
    else:
        print(f"Failed to fetch {base_url}")

if __name__ == '__main__':
    main()

Average Price of Flats for Rent in Lagos has been saved to ../data_fetched/prices
Extracted data: {'title': 'Average Price of Flats for Rent in Lagos', 'table': [['Area', 'Average\nPrice', 'Max. Property\nPrice', 'Min. Property\nPrice', 'Total Property\nCount', 'New Property\nAdded'], ['Eko Atlantic City', '₦42,730,000', '₦500,000,000', '₦3,000,000', '22', '5'], ['Ikoyi', '₦17,870,000', '₦10,000,000,000', '₦275,000', '3,560', '593'], ['Victoria Island (VI)', '₦8,840,000', '₦14,000,000,000', '₦450,000', '2,898', '474'], ['Lekki', '₦4,490,000', '₦8,500,000,000', '₦65,000', '16,982', '3,398'], ['Ikeja', '₦2,810,000', '₦3,000,000,000', '₦180,000', '1,646', '349'], ['Magodo', '₦2,700,000', '₦10,000,000', '₦400,000', '981', '206'], ['Gbagada', '₦2,670,000', '₦75,000,000', '₦70,000', '931', '199'], ['Maryland', '₦2,670,000', '₦45,000,000', '₦250,000', '321', '72'], ['Surulere', '₦2,350,000', '₦325,000,000', '₦300,000', '877', '133'], ['Ilupeju', '₦2,300,000', '₦6,000,000', '₦600,000', '98', '