In [1]:
from bs4 import BeautifulSoup, Tag
import requests
from markdownify import markdownify as md
import json
import re
import os
import time
from tqdm import tqdm

In [2]:
base_url = 'https://nigeriapropertycentre.com/market-trends/average-prices/for-rent/flats-apartments/rivers/port-harcourt'
path = '../data_fetched/prices'
delay = 10 # seconds
bs_parser = 'html.parser'

if not os.path.exists(path):
    os.mkdir(path)

In [3]:
def fetch_page(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            print(f"failed to fetch response from {url} with status code {response.status_code}")
    except Exception as e:
        print(f"failed to fetch response from {url} with error {e}")

In [4]:
# def extract_table(html_content):
#     data = {}
#     soup = BeautifulSoup(html_content, bs_parser)

#     # Extract the title
#     title_tag = soup.find('h1', class_='page-title text-center')
#     if title_tag:
#         title = title_tag.get_text(strip=True)
#     else:
#         title = 'No title found'
#     data['title'] = title

#     # Extract the table
#     table_section = soup.find('table', class_='google-visualization-table-table')
#     table_data = []
#     if isinstance(table_section, Tag):
#         rows = table_section.find_all('tr', class_='google-visualization-table-tr-even')
#         for row in rows:
#             cols = row.find_all('td')
#             cols_data = [col.get_text(strip=True) for col in cols]
#             table_data.append(cols_data)
#     else:
#         table_data = []
#     data['table'] = table_data

#     return data

def extract_table(html_content):
    data = {}
    soup = BeautifulSoup(html_content, bs_parser)

    # Extract the title
    title_tag = soup.find('h1', class_='page-title text-center')
    if title_tag:
        title = title_tag.get_text(strip=True)
    else:
        title = 'No title found'
    data['title'] = title

    # Extract the table
    table_section = soup.find('table', class_='google-visualization-table-table')
    table_data = []

    if isinstance(table_section, Tag):
        # Extract table headers
        header_row = table_section.find('tr', class_='google-visualization-table-tr-head')
        if isinstance(header_row, Tag):
            headers = [th.get_text(strip=True) for th in header_row.find_all('th')]
            table_data.append(headers)

        # Extract table rows
        rows = table_section.find_all('tr', class_='google-visualization-table-tr-even')
        for row in rows:
            cols = row.find_all('td')
            cols_data = [col.get_text(strip=True) for col in cols]
            table_data.append(cols_data)
    else:
        table_data = []

    data['table'] = table_data

    return data

In [5]:
def save_to_json(data, filename):
    filename = filename.strip().replace(' ', '_')
    with open(os.path.join(path, f'{filename}.json'), 'w') as f:
        json.dump(data, f)

In [6]:
def main():
    html_content = fetch_page(base_url)
    if html_content:
        extracted_data = extract_table(html_content)
        filename = extracted_data.get('title')
        save_to_json(extracted_data, filename)
        print(f'{filename} has been saved to {path}')
        print(extracted_data)
    else:
        print(f"Failed to fetch {base_url}")

if __name__ == '__main__':
    main()

Average Price of Flats for Rent in Port Harcourt, Rivers has been saved to ../data_fetched/prices
{'title': 'Average Price of Flats for Rent in Port Harcourt, Rivers', 'table': [['Month', 'Average\nPrice', 'Max. Property\nPrice', 'Min. Property\nPrice', 'Total Property\nCount', 'New Property\nAdded'], ['Jul 2024', '₦1,470,000', '₦30,000,000', '₦120,000', '188', '38'], ['Jun 2024', '₦1,440,000', '₦30,000,000', '₦200,000', '192', '30'], ['May 2024', '₦1,560,000', '₦30,000,000', '₦200,000', '207', '28'], ['Apr 2024', '₦1,360,000', '₦30,000,000', '₦200,000', '215', '28'], ['Mar 2024', '₦1,330,000', '₦30,000,000', '₦300,000', '247', '31'], ['Feb 2024', '₦1,350,000', '₦30,000,000', '₦280,000', '257', '35'], ['Jan 2024', '₦1,360,000', '₦11,000,000', '₦280,000', '268', '41'], ['Dec 2023', '₦1,330,000', '₦11,000,000', '₦280,000', '266', '44'], ['Nov 2023', '₦1,260,000', '₦11,000,000', '₦280,000', '275', '36'], ['Oct 2023', '₦1,240,000', '₦1,700,000,000', '₦280,000', '294', '61'], ['Sep 2023', '