## **Data Preparation**

Scraping all relevant data needed on maternal health care and reproductive system from the appropriate websites. 

World Health Organization

In [4]:
#libraries
import requests
from bs4 import BeautifulSoup
import re
import json
import os
from urllib.parse import urlparse
from scripts.loggingsetup import error_logger, success_logger
print("Libraries imported")

ModuleNotFoundError: No module named 'scripts.loggingsetup'

In [None]:
#function to fetch and save structured data from a web page
def get_data(url):
    try:
        #get the data from the website
        response = requests.get(url)
        response.raise_for_status()  #raise error if bad response

        #parse the data using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        #find the script tag that contains the data
        script_tag = soup.find('script', text=re.compile('window.__PRELOADED_STATE__'))

        if not script_tag:
            print("No preloaded state script found.")
            return None

        #extract and clean JSON-like string
        raw_data = script_tag.string.split('=', 1)[1].strip().rstrip(';')
        
        #convert to dictionary
        data = json.loads(raw_data)

        #generate a file name based on URL path
        path = urlparse(url).path.strip('/')
        file_name = path.replace('/', '_') or 'index'
        file_path = f"{file_name}.json"

        #save to file
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        print(f"✅ Data saved to {file_path}")
        success_logger.info("Successfully saved data to file path")
        return data
    except Exception as e:
        print(f"❌ Error processing {url}: {str(e)}")
        error_logger.error(f"Error in processing {url}: {e}")
        