In [1]:
%load_ext kedro.ipython

In [2]:
import re
import unicodedata

import pandas as pd
from bs4 import BeautifulSoup

In [3]:
class HTMLExtractor:
    """
    A class to extract and process various elements from HTML content
    using BeautifulSoup.

    Attributes:
        soup (BeautifulSoup): A BeautifulSoup object.
    """

    def __init__(self, html_content: str):
        """
        Initializes the HTMLExtractor with the given HTML content.

        Args:
            html_content (str): The HTML content to be processed.
        """
        self.soup = self.preprocess_html(html_content)

    @classmethod
    def clean_text(cls, text: str) -> str:
        """
        Cleans the given text by normalizing Unicode characters,
        handling special symbols, replacing problematic characters,
        and removing multiple whitespace.

        Args:
            text (str): The input text to be cleaned.

        Returns:
            str: The cleaned text.
        """
        # Normalize Unicode characters
        text = unicodedata.normalize("NFKD", text)
        # Use ASCII encoding to handle special symbols e.g. copyright \xa9
        text = text.encode("ascii", "ignore").decode("utf-8")

        # Replace common problematic characters
        text = text.replace("\xa0", " ")  # non-breaking space
        text = text.replace("\u200b", "")  # zero-width space
        text = text.replace("\u2028", "\n")  # line separator
        text = text.replace("\u2029", "\n")  # paragraph separator

        # Replace multiple whitespace with single space
        text = re.sub(r"\s+", " ", text)
        return text.strip()

    @classmethod
    def preprocess_html(cls, html_content: str) -> BeautifulSoup:
        """
        Preprocesses the given HTML content by replacing all <br>
        tags with newline characters.

        Args:
            html_content (str): The HTML content to be preprocessed.

        Returns:
            BeautifulSoup: The preprocessed HTML content as a BeautifulSoup object.
        """
        soup = BeautifulSoup(html_content, "html.parser")

        # Find all <br> tags and replace them with newline
        for br in soup.find_all("br"):
            br.replace_with("\n")

        return soup

    def extract_related_sections(self) -> list[str]:
        """
        Extracts "Related:" sections and "Read these next:" items from the HTML content.

        Returns:
            list[str]: A list of related sections and "Read these next:" items.
        """
        related_sections = []
        read_these_next_ul = None
        # Extract "Related:" sections and "Read these next:" items
        for tag in self.soup.find_all(["p", "ul"]):
            if tag.name == "p" and tag.find("strong"):
                if "Related:" in tag.text:
                    related_sections.append(
                        re.sub(r"Related: ", "", self.clean_text(tag.text))
                    )
                elif "Read these next:" in tag.text:
                    read_these_next_ul = tag.find_next_sibling("ul")
            elif tag == read_these_next_ul:
                for li in tag.find_all("li"):
                    related_sections.append(self.clean_text(li.text))

        return related_sections

    def extract_text(self) -> str:
        """
        Extracts the main content from the HTML content.

        Returns:
            str: The main content extracted from the HTML content.

        Note:
            This function unwraps the HTML content if it is contained in a <div>. It then extracts the
            main content by iterating over the tags in the soup. The following tags are considered:

                - h1, h2, h3, h4, h5, h6: These tags are treated as key headers and are paragraphed between them.
                - p: This tag is treated as a paragraph. <em> tags are removed from the text.
                    * If the text does not contain sentences about HealthHub app, Google Play, or Apple Store,
                    and it contains a strong tag, it is treated differently based on the text content.
                - ul: This tag is treated as an unordered list. If it is the child of a <div>, it is treated as a list.
                - ol: This tag is treated as an ordered list.
                - div: This tag is treated as a text within a div.

            The extracted content is stored in a list and then processed. Double newlines are replaced with single
            newlines and whitespace is stripped. If the processed text is empty, the function attempts to extract the
            content from the <div> tags.
        """
        # Unwrap if the HTML content is contained in a div
        if self.soup.div is not None:
            self.soup.div.unwrap()

        # Extract the main content
        content = []
        for tag in self.soup.find_all(
            ["h1", "h2", "h3", "h4", "h5", "h6", "div", "p", "ul", "ol"]
        ):
            if tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
                # Provide paragraphing between key headers
                content.append("\n")
                content.append(self.clean_text(tag.text))

            elif tag.name == "p":
                # Remove all em tags
                for em in tag.find_all("em"):
                    em.extract()
                # Get the remaining text
                text = tag.get_text()
                # Remove sentences about HealthHub app, Google Play, and Apple Store
                if not re.search(
                    r"(HealthHub app|Google Play|Apple Store|Parent Hub|References)",
                    text,
                ):
                    if tag.find("strong"):
                        if "Related:" in tag.text:
                            text = self.clean_text(tag.text)
                            content.append(re.sub(r"\n", " ", text))
                        elif "Read these next:" in tag.text:
                            content.append(self.clean_text(tag.text))
                    else:
                        content.append(self.clean_text(text))
            # For unordered lists
            elif (
                tag.name == "ul" and tag.parent.name == "div"
            ):  # not "ul" so we avoid duplicates
                for li in tag.find_all("li"):
                    content.append("- " + self.clean_text(li.text))
            # For ordered lists
            elif tag.name == "ol":
                for i, li in enumerate(tag.find_all("li")):
                    content.append(f"{i + 1}. " + self.clean_text(li.text))
            # For texts within div
            elif tag.name == "div":
                content.append(self.clean_text(tag.text))

            content.append("")  # Add a blank line after each element

        # Remove empty strings from content
        content = [c for c in content if c]

        # Replace double newlines with single newlines and strip whitespace
        processed_text = "\n".join(content).replace("\n\n", "\n").strip()

        # Edge case - HTML content contained in div tags
        if processed_text.strip() == "":
            content = []
            # Unwrap if the HTML content is contained in a div
            if self.soup.div is not None:
                self.soup.div.unwrap()
                # For texts within div
                for tag in self.soup.find_all("div"):
                    if tag.name == "div":
                        content.append(self.clean_text(tag.text))

                # Replace double newlines with single newlines and strip whitespace
                processed_text = "\n".join(content).replace("\n\n", "\n").strip()

        return processed_text

    def extract_links(self) -> list[tuple[str, str]]:
        """
        Extracts the title and URL from all the anchor tags in the HTML content.

        Returns:
            list[tuple[str, str]]:
                A list of tuples containing the title and URL of each anchor tag.
        """
        url_records = []

        # Extract title/text and links from anchor tags
        for link in self.soup.find_all("a"):
            url = link.get("href")
            # Ignore footnotes
            if url != "#footnotes":
                text = link.get("title") or link.get_text()
                record = text, url
                url_records.append(record)

        return url_records

    def extract_headers(self) -> list[tuple[str, str]]:
        """
        Extracts the headers from the HTML content.

        Returns:
            list[tuple[str, str]]:
                A list of tuples containing the text and tag name of
                each header found in the HTML content.
        """
        headers = []

        for title in self.soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
            tag = title.name
            text = title.get_text()
            # Ignore References
            if text != "References":
                record = text, tag
                headers.append(record)

        return headers

In [4]:
# ruff: noqa: F821
catalog.list()


[1m[[0m
    [32m'all_contents'[0m,
    [32m'all_contents_processed'[0m,
    [32m'all_contents_extracted'[0m,
    [32m'all_extracted_text'[0m,
    [32m'parameters'[0m,
    [32m'params:columns_to_keep'[0m,
    [32m'params:columns_to_keep.cost-and-financing'[0m,
    [32m'params:columns_to_keep.diseases-and-conditions'[0m,
    [32m'params:columns_to_keep.health-statistics'[0m,
    [32m'params:columns_to_keep.live-healthy-articles'[0m,
    [32m'params:columns_to_keep.medical-care-and-facilities'[0m,
    [32m'params:columns_to_keep.medications'[0m,
    [32m'params:columns_to_keep.program-sub-pages'[0m,
    [32m'params:columns_to_keep.programs'[0m,
    [32m'params:columns_to_keep.support-group-and-others'[0m,
    [32m'params:metadata'[0m,
    [32m'params:metadata.cost-and-financing'[0m,
    [32m'params:metadata.cost-and-financing.uuid'[0m,
    [32m'params:metadata.cost-and-financing.content_title'[0m,
    [32m'params:metadata.cost-and-financing.conten

### Load all the contents

In [5]:
# ruff: noqa: F821
all_contents = catalog.load("all_contents")
all_contents


[1m{[0m
    [32m'export-published-cost-and-financing_14062024_data'[0m: [1m<[0m[1;95mbound[0m[39m method AbstractVersionedDataset.load of <kedro_datasets.pandas.excel_dataset.ExcelDataset object at [0m[1;36m0x1181a8c50[0m[39m>>,[0m
[39m    [0m[32m'export-published-diseases-and-conditions_13062024_data'[0m[39m: <bound method AbstractVersionedDataset.load of <kedro_datasets.pandas.excel_dataset.ExcelDataset object at [0m[1;36m0x169ad8c20[0m[39m>>,[0m
[39m    [0m[32m'export-published-health-statistics_14062024_data'[0m[39m: <bound method AbstractVersionedDataset.load of <kedro_datasets.pandas.excel_dataset.ExcelDataset object at [0m[1;36m0x1699e0530[0m[39m>>,[0m
[39m    [0m[32m'export-published-live-healthy-articles_14062024_data'[0m[39m: <bound method AbstractVersionedDataset.load of <kedro_datasets.pandas.excel_dataset.ExcelDataset object at [0m[1;36m0x169aee330[0m[39m>>,[0m
[39m    [0m[32m'export-published-medical-care-and-facilities_1406

### Load the content category

In [6]:
content_category = "live-healthy-articles"

### Use `content_category` as key to access the corresponding content

In [7]:
df = all_contents[f"export-published-{content_category}_14062024_data"]()
# Drop all columns which have only null values
df = df.dropna(axis=1, how="all")
print(df.shape)

(1155, 60)


### View all columns to keep for corresponding content category

In [8]:
# ruff: noqa: F821
columns_to_keep = catalog.load("params:columns_to_keep")
columns_to_keep


[1m{[0m
    [32m'cost-and-financing'[0m: [1m[[0m
        [32m'id'[0m,
        [32m'Content.Name'[0m,
        [32m'CostAndFinancing_Title'[0m,
        [32m'CostAndFinancing_ArticleCatNames'[0m,
        [32m'CostAndFinancing_CoverImgUrl'[0m,
        [32m'CostAndFinancing_FullUrl'[0m,
        [32m'CostAndFinancing_FullUrl2'[0m,
        [32m'CostAndFinancing_FriendlyUrl'[0m,
        [32m'CostAndFinancing_CategoryDesc'[0m,
        [32m'CostAndFinancing_ContentBody'[0m,
        [32m'CostAndFinancing_ENKeywords'[0m,
        [32m'CostAndFinancing_FeatureTitle'[0m,
        [32m'CostAndFinancing_PRName'[0m,
        [32m'CostAndFinancing_AlternateImageText'[0m,
        [32m'CostAndFinancing_DateModified'[0m,
        [32m'CostAndFinancing_NumberofViews'[0m,
        [32m'CostAndFinancing_LastMonthViewCount'[0m,
        [32m'CostAndFinancing_LastTwoMonthsView'[0m,
        [32m'Page Views'[0m,
        [32m'Engagement Rate'[0m,
        [32m'Bounce Rate'

### Get columns to drop corresponding to `content_category`

In [9]:
relevant_columns = columns_to_keep[content_category]
relevant_columns


[1m[[0m
    [32m'id'[0m,
    [32m'Content.Name'[0m,
    [32m'LiveHealthyArticle_Title'[0m,
    [32m'LiveHealthyArticle_ArticleCateName'[0m,
    [32m'LiveHealthyArticle_CoverImgUrl'[0m,
    [32m'LiveHealthyArticle_FullUrl'[0m,
    [32m'LiveHealthyArticle_FullUrl2'[0m,
    [32m'LiveHealthyArticle_FriendlyUrl'[0m,
    [32m'LiveHealthyArticle_CategoryDes'[0m,
    [32m'LiveHealthyArticle_Content_Body'[0m,
    [32m'LiveHealthyArticle_ENKeywords'[0m,
    [32m'LiveHealthyArticle_FeatureTitle'[0m,
    [32m'LiveHealthyArticle_PRName'[0m,
    [32m'LiveHealthyArticle_AlternateImageText'[0m,
    [32m'LiveHealthyArticle_DateModified'[0m,
    [32m'LiveHealthyArticle_Number_of_View'[0m,
    [32m'LiveHealthyArticle_Lastmonthview'[0m,
    [32m'LiveHealthyArticle_LastTwoMonthsView'[0m,
    [32m'Page Views'[0m,
    [32m'Engagement Rate'[0m,
    [32m'Bounce Rate'[0m,
    [32m'Exit Rate'[0m,
    [32m'Scroll %'[0m,
    [32m'% of Total Views'[0m,
    [32m'Cu

In [10]:
# Keep all relevant columns
df = df[relevant_columns]
print(df.shape)

(1155, 25)


In [11]:
# Remaining columns
df.columns


[1;35mIndex[0m[1m([0m[1m[[0m[32m'id'[0m, [32m'Content.Name'[0m, [32m'LiveHealthyArticle_Title'[0m,
       [32m'LiveHealthyArticle_ArticleCateName'[0m, [32m'LiveHealthyArticle_CoverImgUrl'[0m,
       [32m'LiveHealthyArticle_FullUrl'[0m, [32m'LiveHealthyArticle_FullUrl2'[0m,
       [32m'LiveHealthyArticle_FriendlyUrl'[0m, [32m'LiveHealthyArticle_CategoryDes'[0m,
       [32m'LiveHealthyArticle_Content_Body'[0m, [32m'LiveHealthyArticle_ENKeywords'[0m,
       [32m'LiveHealthyArticle_FeatureTitle'[0m, [32m'LiveHealthyArticle_PRName'[0m,
       [32m'LiveHealthyArticle_AlternateImageText'[0m,
       [32m'LiveHealthyArticle_DateModified'[0m, [32m'LiveHealthyArticle_Number_of_View'[0m,
       [32m'LiveHealthyArticle_Lastmonthview'[0m,
       [32m'LiveHealthyArticle_LastTwoMonthsView'[0m, [32m'Page Views'[0m, [32m'Engagement Rate'[0m,
       [32m'Bounce Rate'[0m, [32m'Exit Rate'[0m, [32m'Scroll %'[0m, [32m'% of Total Views'[0m,
       [32m'C

### Get metadata for corresponding content category

In [12]:
# ruff: noqa: F821
metadata = catalog.load("params:metadata")
metadata


[1m{[0m
    [32m'cost-and-financing'[0m: [1m{[0m
        [32m'uuid'[0m: [32m'id'[0m,
        [32m'content_title'[0m: [32m'CostAndFinancing_Title'[0m,
        [32m'content_body'[0m: [32m'CostAndFinancing_ContentBody'[0m
    [1m}[0m,
    [32m'diseases-and-conditions'[0m: [1m{[0m
        [32m'uuid'[0m: [32m'id'[0m,
        [32m'content_title'[0m: [32m'DiseasesCondition_Title'[0m,
        [32m'content_body'[0m: [32m'DiseasesCondition_ContentBody'[0m
    [1m}[0m,
    [32m'health-statistics'[0m: [1m{[0m
        [32m'uuid'[0m: [32m'id'[0m,
        [32m'content_title'[0m: [32m'HealthStatistics_Title'[0m,
        [32m'content_body'[0m: [32m'HealthStatistics_ContentBody'[0m
    [1m}[0m,
    [32m'live-healthy-articles'[0m: [1m{[0m
        [32m'uuid'[0m: [32m'id'[0m,
        [32m'content_title'[0m: [32m'LiveHealthyArticle_Title'[0m,
        [32m'content_body'[0m: [32m'LiveHealthyArticle_Content_Body'[0m
    [1m}[0m,
    [3

In [13]:
content_body = metadata[content_category]["content_body"]
content_body

[32m'LiveHealthyArticle_Content_Body'[0m

### Flag all articles with no content

In [14]:
# Mark articles with no content `to_remove`
df["to_remove"] = df[content_body].apply(
    lambda x: (
        False if pd.notna(x) and re.search(r"(<[div|p|h2].*?>)", str(x)) else True
    )
)
print(df.shape)

(1155, 26)


### Sample 5 articles

In [15]:
tmp = df.sample(5, random_state=42).reset_index(drop=True)
tmp

Unnamed: 0,id,Content.Name,LiveHealthyArticle_Title,LiveHealthyArticle_ArticleCateName,LiveHealthyArticle_CoverImgUrl,LiveHealthyArticle_FullUrl,LiveHealthyArticle_FullUrl2,LiveHealthyArticle_FriendlyUrl,LiveHealthyArticle_CategoryDes,LiveHealthyArticle_Content_Body,...,LiveHealthyArticle_Lastmonthview,LiveHealthyArticle_LastTwoMonthsView,Page Views,Engagement Rate,Bounce Rate,Exit Rate,Scroll %,% of Total Views,Cumulative % of Total Views,to_remove
0,1445429,Know Your Alcohol Limit: Don’t Be a Party Pooper!,Know Your Alcohol Limit: Don’t Be a Party Pooper!,"Sexual Health and Relationships,",https://ch-api.healthhub.sg/api/public/content...,https://www.healthhub.sg/live-healthy/dont_be_...,www.healthhub.sg/live-healthy/dont_be_a_party_...,dont_be_a_party_pooper,Everyone enjoys a night of partying. But what ...,"<div class=""ExternalClassFF709E64E1BD479883D08...",...,62.0,95.0,224,0.970954,0.029046,0.166023,0.266741,0.000189,0.986617,False
1,1442458,Help Your Child Untangle From The Web,Help Your Child Untangle From The Web,"Mind and Balance,",https://ch-api.healthhub.sg/api/public/content...,https://www.healthhub.sg/live-healthy/help-you...,www.healthhub.sg/live-healthy/help-your-child-...,help-your-child-untangle-from-the-web,Does your child spend long hours in front of t...,"<div class=""ExternalClassF5D63B3B546A42419922B...",...,171.0,291.0,1140,0.807725,0.192275,0.318612,0.3375,0.000963,0.677177,False
2,1444585,Makan Matters: What‘s a Balanced Diet?,Makan Matters: What‘s a Balanced Diet?,"Food and Nutrition,",https://ch-api.healthhub.sg/api/public/content...,https://www.healthhub.sg/live-healthy/makan-ma...,www.healthhub.sg/live-healthy/makan-matters-wh...,makan-matters-whats-a-healthy-diet,Trying to eat better? Find out what makes a he...,"<div class=""ExternalClassB17BB24F34014C7AAEC3F...",...,0.0,0.0,2434,0.7025,0.2975,0.701918,0.368735,0.002057,0.462499,False
3,1443659,Preparing for Pregnancy: 3 Things to Do Now Th...,Preparing for Pregnancy: 3 Things to Do Now Th...,"Body Care,",https://ch-api.healthhub.sg/api/public/content...,https://www.healthhub.sg/live-healthy/3-things...,www.healthhub.sg/live-healthy/3-things-to-do-n...,3-things-to-do-now-that-youre-pregnant,You've stopped eating raw food and you've also...,"<div class=""ExternalClassA116BA2F83BB43CE9D25E...",...,30.0,41.0,1027,0.71013,0.28987,0.706122,0.346641,0.000868,0.707952,False
4,1444177,Time to Exercise: Aeroplane (in Four-Point Kne...,Time to Exercise: Aeroplane (in Four-Point Kne...,"Exercise & Fitness,",https://ch-api.healthhub.sg/api/public/content...,https://www.healthhub.sg/live-healthy/time-to-...,www.healthhub.sg/live-healthy/time-to-exercise...,time-to-exercise-aeroplane-in-four-point-kneeling,"Hey there mum-to-be, let's do some exercise! G...","<div class=""ExternalClass5A08DDBEC9704AB088788...",...,23.0,23.0,277,0.889251,0.110749,0.298611,0.268051,0.000234,0.961391,False


In [16]:
content_title = metadata[content_category]["content_title"]
content_title

[32m'LiveHealthyArticle_Title'[0m

In [17]:
tmp["related_sections"] = None
tmp["extracted_content_body"] = None
tmp["extracted_links"] = None
tmp["extracted_headers"] = None

for index, row in tmp.iterrows():
    # Skip extraction for those articles flagged for removal
    if row["to_remove"]:
        continue

    # Replace all forward slashes with hyphens to avoid saving as folders
    title = row[content_title]
    print(title)

    # Get the HTML content
    html_content = row[content_body]

    # Extract text from HTML using the HTMLExtractor Class
    extractor = HTMLExtractor(html_content)
    related_sections = extractor.extract_related_sections()
    extracted_content_body = extractor.extract_text()
    extracted_links = extractor.extract_links()
    extracted_headers = extractor.extract_headers()

    # Store extracted data into the dataframe
    tmp.at[index, "related_sections"] = related_sections
    tmp.at[index, "extracted_content_body"] = extracted_content_body
    tmp.at[index, "extracted_links"] = extracted_links
    tmp.at[index, "extracted_headers"] = extracted_headers

    # If `extracted_content_body` is empty, we update flag to remove
    if extracted_content_body == "":
        tmp.at[index, "to_remove"] = True

Know Your Alcohol Limit: Don’t Be a Party Pooper!
Help Your Child Untangle From The Web
Makan Matters: What‘s a Balanced Diet?
Preparing for Pregnancy: 3 Things to Do Now That You're Pregnant
Time to Exercise: Aeroplane (in Four-Point Kneeling)


In [18]:
index = 1

tmp.iloc[index]["LiveHealthyArticle_FullUrl"]

[32m'https://www.healthhub.sg/live-healthy/help-your-child-untangle-from-the-web'[0m

In [19]:
tmp.iloc[index]["related_sections"]


[1m[[0m
    [32m'Screen Time'[0m,
    [32m'Disconnect To Reconnect Why A Social Media Detox Might Be Good For You'[0m,
    [32m'9 Health Hazards of Electronic Devices for Kids'[0m,
    [32m'Dealing with Cyber-Bullies'[0m,
    [32m'It Is Best Your Child Hears It From You'[0m,
    [32m'Dealing With Cyber Bullies'[0m,
    [32m'Parent Hub'[0m,
    [32m'Happy and Lasting Relationships with Children'[0m,
    [32m'Stuck in the Web'[0m
[1m][0m

In [20]:
tmp.iloc[index]["extracted_content_body"]

[32m'Singapore teens are among the most Internet-savvy in the world. Statistics from the Infocomm Development Authority of Singapore show that 99% of teenagers used the Internet in 2014. Their favourite mobile activities are social networking on platforms like Instagram, instant messaging via apps like Snapchat, and sending and receiving emails.\nOnline activities are enjoyable, so it is easy to spend long hours on the computer. It is normal for your child to want to spend more time on activities that fascinate them, and these interests are often good outlets for learning, creativity and self-expression.\nBut when any activity becomes the major focus of their life to the point where it becomes harmful physically, mentally or socially your child may have a cyber addiction problem. A 2010 study[0m[32m[[0m[32m1[0m[32m][0m[32m led by the National Institute of Education and the Media Development Authority found that Singaporean youth spend more time than American adolescents on vid

In [21]:
tmp.iloc[index]["extracted_links"]


[1m[[0m
    [1m([0m
        [32m'Screen Time'[0m,
        [32m'http://www.babybonus.msf.gov.sg/parentingresources/web/Young-Children/YoungChildrenPlay_and_Learning/Screen_Time/Young_Children_Screen_Time'[0m
    [1m)[0m,
    [1m([0m[32m'these ideas'[0m, [32m'https://www.healthhub.sg/live-healthy/ideas-for-an-active-weekend'[0m[1m)[0m,
    [1m([0m
        [32m'Disconnect To Reconnect — Why A Social Media Detox Might Be Good For You'[0m,
        [32m'https://www.healthhub.sg/live-healthy/disconnect-to-reconnect-why-a-social-media-detox-might-be-good-for-you'[0m
    [1m)[0m,
    [1m([0m
        [32m'9 Health Hazards of Electronic Devices for Kids'[0m,
        [32m'https://www.healthhub.sg/live-healthy/9-health-hazards-of-electronic-devices-for-kids'[0m
    [1m)[0m,
    [1m([0m
        [32m'Dealing with Cyber-Bullies'[0m,
        [32m'https://www.healthhub.sg/live-healthy/dealing_with_cyber-bullies'[0m
    [1m)[0m,
    [1m([0m
        [32m'It Is

In [22]:
tmp.iloc[index]["extracted_headers"]


[1m[[0m
    [1m([0m[32m'1. Practise What You Preach'[0m, [32m'h2'[0m[1m)[0m,
    [1m([0m[32m'2. Have Control'[0m, [32m'h2'[0m[1m)[0m,
    [1m([0m[32m'3. Be Understanding'[0m, [32m'h2'[0m[1m)[0m,
    [1m([0m[32m'4. Be Alert to Danger'[0m, [32m'h2'[0m[1m)[0m,
    [1m([0m[32m'5. Set Realistic Rules'[0m, [32m'h2'[0m[1m)[0m
[1m][0m