In [1]:
import json
import re

In [2]:
# Function to clean HTML tags from a string and ensure proper spacing
def cleanHTML(html):
    # Replace <br> or <br/> with a newline for better readability
    html = re.sub(r"<br\s*/?>", "\n", html)

    # Remove remaining HTML tags
    html = re.sub(r"</?[^>]+>", "", html)

    # Remove backslashes (\\)
    html = re.sub(r"\\", "", html)

    # Replace special characters with their correct form
    html = html.replace("â€™", "'")  # Replacing incorrectly decoded apostrophes
    html = html.replace("â€“", "–")  # Replacing en-dash
    html = html.replace("â€œ", '"').replace("â€", '"')

    # Remove extra spaces and trim the result
    html = re.sub(r"\s+", " ", html).strip()

    return html

In [3]:
def clean_pharmacy_data(pharmacy_data):
    # Iterate over each item in the list
    for item in pharmacy_data:
        # Replace <br> tags with new lines in 'address' and other relevant fields
        if "address" in item:
            item["address"] = re.sub(r"<br\s*/?>", "\n", item["address"])
    return pharmacy_data


def extract_pharmacy_data(js_file_path):
    # Read the JavaScript file content
    with open(js_file_path, encoding="utf-8") as file:
        js_content = file.read()

    # Use regular expression to find the pharmacyData variable in the JS content
    pattern = r"pharmacyData\s*=\s*(\[[\s\S]*?\]);"
    match = re.search(pattern, js_content)

    if match:
        # Extract the matched pharmacyData content
        pharmacy_data_str = match.group(1)

        try:
            # Convert the string to a Python list
            pharmacy_data = json.loads(pharmacy_data_str)

            # Clean the data
            cleaned_data = clean_pharmacy_data(pharmacy_data)

            return cleaned_data
        except json.JSONDecodeError as e:
            print(f"JSON decode error: {e}")
            print("Please inspect the pharmacy_data_str for invalid JSON formatting.")
            return None
    else:
        print("No pharmacyData found in the JavaScript file.")
        return None


# Example usage
js_file_path = "IQuit.js"  # Path to your JavaScript file
pharmacy_data = extract_pharmacy_data(js_file_path)

In [4]:
# Organize data by service and location


def organize_pharmacy_data(data):
    organized_data = {}
    for entry in data:
        service = entry["service"]
        location = entry["location"]
        address = cleanHTML(entry["address"])

        if service not in organized_data:
            organized_data[service] = {}

        if location not in organized_data[service]:
            organized_data[service][location] = []

        organized_data[service][location].append(
            {
                "name": entry["name"],
                "location": location,
                "address": address,
                "tel": entry["tel"],
                "map": entry["map"],
            }
        )

    return organized_data


organized_pharmacy_data = organize_pharmacy_data(pharmacy_data)

In [5]:
def format_pharmacy_data(data):
    formatted_content = ""
    for service, locations in data.items():
        for location, centres in locations.items():
            formatted_content += f"These are the information for the centres for the {service} service in the {location}:\n"
            for centre in centres:
                formatted_content += f"Name: {centre['name']}\n"
                formatted_content += f"Location: {centre['location']}\n"
                formatted_content += f"Address: {centre['address']}\n"
                formatted_content += f"Tel: {centre['tel']}\n"
                formatted_content += f"Map: {centre['map']}\n\n"
    return formatted_content


formatted_data = format_pharmacy_data(organized_pharmacy_data)

supplementary_text = """This article provides information about help centres and pharmacies that offer support to help people quit smoking. There are two main services available: the **Start to S.T.O.P. Centre** and **I Quit Touchpoint**. Both are designed to give you the guidance and tools necessary to successfully quit smoking. Quitting smoking is a big step towards improving your health, and the I Quit Programme is here to assist you on this journey. At participating pharmacies, you can sign up for the I Quit Programme and receive a personalized quit plan tailored to help you break free from smoking. This program also offers face-to-face counselling (subject to prevailing social management measures) under the **Start to S.T.O.P. (Speak to Our Pharmacists)** initiative, providing the guidance and motivation you need to quit successfully. Additionally, free counselling sessions are available at HPB’s **I Quit Touchpoint Centres** in selected stores*. Learn more about the Start to S.T.O.P. programme, championed by the Pharmaceutical Society of Singapore, available at selected retail pharmacies (*Guardian and Unity*). These resources are dedicated to supporting your efforts to quit smoking for good. Below are the various services across the five locations in Singapore: the West, East, North East, North, and Central.\n"""

# Combine the supplementary text with the formatted pharmacy data
final_content = supplementary_text + formatted_data

In [6]:
# Define the JSON data structure
json_data = [
    {
        "id": "1434716_content_js",
        "title": "I Quit Programme",
        "cover_image_url": "https://ch-api.healthhub.sg/api/public/content/541d2fcc81a74f3cadfec14c34da1245?v=df4a9659",
        "full_url": "https://www.healthhub.sg/programmes/IQuit",
        "content_category": "programs",
        "category_description": "Stay smoke-free for 28 days and you're 5 times more likely to quit for good.",
        "pr_name": "Health Promotion Board",
        "date_modified": None,
        "content": final_content,
    }
]

# Write the JSON data to a file
with open("1434716_content_js.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# Write the formatted text content to a .txt file
with open("IQuit.txt", "w") as txt_file:
    txt_file.write(final_content)