# scrape ST links and articles by year

In [1]:
# scraping year by year
OUTPUT_XLSX = "Output/ST/st_articles_2024_Jan_to_Dec.xlsx"
CHECKPOINT_CSV = "Output/ST/st_articles_2024_Jan_to_Dec_checkpoint.csv"

In [2]:
# DATE PARAMETERS
MONTHS = ["01","02","03","04","05","06","07", "08", "09", "10", "11", "12"]
SITEMAP_TMPL = "https://www.straitstimes.com/sitemap/2024/{m}/feeds.xml"


In [3]:
# WEBSITES TO SCRAPE FROM
# label: how it will appear in the new 'website' column
# base_url: the listing page (to exclude if it ever shows up)
# match_substrings: if any of these strings is in the URL, we treat it as belonging to this website
WEBSITE_SOURCES = [
    {
        "label": "housing",
        "base_url": "https://www.straitstimes.com/singapore/housing",
        "match_substrings": ["/singapore/housing/"],
    },
    {
        "label": "tag_hdb",
        "base_url": "https://www.straitstimes.com/tags/hdb",
        "match_substrings": ["/hdb"],
    },
    {
        "label": "tag_public_housing",
        "base_url": "https://www.straitstimes.com/tags/public-housing",
        "match_substrings": ["/public-housing"],
    },
    {
        "label": "tag_singapore_property",
        "base_url": "https://www.straitstimes.com/tags/singapore-property",
        "match_substrings": ["/singapore-property"],
    },
    {
        "label": "tag_residential_property",
        "base_url": "http://straitstimes.com/tags/residential-property",
        "match_substrings": ["/residential-property"],
    },
    {
        "label": "tag_private_property",
        "base_url": "https://www.straitstimes.com/tags/private-property",
        "match_substrings": ["/private-property"],
    },
]

In [4]:
# libraries
import warnings
warnings.filterwarnings("ignore")

from __future__ import annotations

import os
import csv
import re
import time
from datetime import date

import pandas as pd
import pytz # timezone handling
import requests
from bs4 import BeautifulSoup
from dateutil import parser as dtparser # flexibly parse date/time strings into datetime objects
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


In [5]:
REQUEST_TIMEOUT = 30
SLEEP_BETWEEN_ARTICLES = 0.3  # politeness delay (seconds) between article hits
STOP_PHRASES = [
    # e.g. "Join ST's Telegram channel",
    #      "Get unlimited access",
]

# URL to open for manual login (home page or dedicated login page)
LOGIN_URL = "https://www.straitstimes.com/"  # you can change to a specific login URL if you prefer

# =================================

TZ_SGT = pytz.timezone("Asia/Singapore")

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0 Safari/537.36"
    )
}

PUBLISHED_RE = re.compile(
    r"^Published\s+[A-Za-z]{3,9}\s+\d{1,2},\s+\d{4}"
    r"(?:,\s+\d{1,2}:\d{2}\s*(?:AM|PM))?\s*$",
    re.IGNORECASE,
)
UPDATED_RE = re.compile(
    r"^Updated\s+[A-Za-z]{3,9}\s+\d{1,2},\s+\d{4}"
    r"(?:,\s+\d{1,2}:\d{2}\s*(?:AM|PM))?\s*$",
    re.IGNORECASE,
)


# ========== SESSION FOR SITEMAPS (REQUESTS) ==========

def get_sitemap(url: str) -> str:
    """Fetch sitemap XML using requests (no login needed)."""
    r = requests.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT)
    if not r.encoding or r.encoding.lower() == "iso-8859-1":
        r.encoding = "utf-8"
    r.raise_for_status()
    return r.text


# ========== BROWSER FOR LOGGED-IN ARTICLE PAGES (SELENIUM) ==========

def create_logged_in_driver() -> webdriver.Chrome:
    """
    Open a real browser window, let the user log in manually,
    and wait for them to press Enter before scraping.
    """
    options = Options()
    # If you want headless, uncomment:
    # options.add_argument("--headless=new")

    driver = webdriver.Chrome(options=options)
    driver.get(LOGIN_URL)
    print(
        "A browser window has opened.\n"
        "1) Please log in to The Straits Times with your subscription in that window.\n"
        "2) Once you see that you're logged in (e.g. access to full articles),\n"
        "   come back to this terminal and press Enter to start scraping."
    )
    input("After you have successfully logged in, press Enter here to continue... ")
    return driver


# ========== URL CLASSIFICATION ==========

def classify_website(url: str):
    """
    Return a label (from WEBSITE_SOURCES) if this URL matches any
    configured website patterns; otherwise return None.
    """
    u_strip = url.rstrip("/")
    for src in WEBSITE_SOURCES:
        # Skip the pure listing page itself, if it ever appears
        if u_strip == src["base_url"].rstrip("/"):
            return None
        for pat in src["match_substrings"]:
            if pat in url:
                return src["label"]
    return None


# ========== TEXT / CONTENT HELPERS ==========

def clean_text(txt: str) -> str:
    txt = txt.replace("\u00a0", " ")
    txt = re.sub(r"[ \t]+", " ", txt)       # collapse multiple spaces
    txt = re.sub(r"\n{3,}", "\n\n", txt)    # collapse >2 blank lines
    return txt.strip()


def cut_at_stop_phrases(text: str, stops) -> str:
    if not stops:
        return text
    lower = text.lower()
    cut_idx = None
    for phrase in stops:
        i = lower.find(phrase.lower())
        if i != -1:
            cut_idx = i if cut_idx is None else min(cut_idx, i)
    return text[:cut_idx].rstrip() if cut_idx is not None else text


def is_promo_line(t: str) -> bool:
    t_low = t.lower()
    return ("sign up now" in t_low and "newsletter" in t_low) or (
        "newsletters delivered to your inbox" in t_low
    )


def is_photo_credit(t: str) -> bool:
    t_strip = t.strip()
    return t_strip.upper().startswith("PHOTO:") or t_strip.upper().startswith("ST PHOTO:")


def is_byline(t: str) -> bool:
    # Common byline patterns: "By John Doe", "By JOHN DOE", or just a short proper-name line
    if t.lower().startswith("by "):
        return True
    tokens = t.strip().split()
    if 1 <= len(tokens) <= 4:
        cap_like = 0
        for w in tokens:
            if re.match(r"^[A-Z][a-z]+(-[A-Z][a-z]+)?$", w) or re.match(r"^[A-Z]{2,}$", w):
                cap_like += 1
        if cap_like == len(tokens):
            return True
    return False


def find_content_paragraphs(soup: BeautifulSoup):
    """
    Heuristically locate the paragraphs that comprise the article body.
    Returns a list of elements (usually <p> or <div>) in document order.

    Special handling for Straits Times:
    - Prefer content inside .storyline-wrapper / .archival-wrapper
    - Ignore paragraphs nested inside <a> (to avoid duplicate link text)
    - Ignore photo credits and promo lines
    """

    # 1. Remove obvious non-content elements, including images
    for sel in [
        "script",
        "style",
        "noscript",
        "header",
        "footer",
        "form",
        "aside",
        "figure",
        "picture",
        "img",
        "figcaption",
    ]:
        for tag in soup.select(sel):
            tag.decompose()

    # 2. Straits Times specific containers: storyline + archival-wrapper
    st_containers = soup.select("div.storyline-wrapper.default, div.storyline-wrapper")
    st_paragraph_nodes = []

    for cont in st_containers:
        # (a) Chuan Park style: text inside <div class="archival-wrapper"><span>...</span></div>
        for block in cont.select("div.archival-wrapper"):
            txt = block.get_text(" ", strip=True)
            if txt:
                st_paragraph_nodes.append(block)

        # (b) Usual <p> paragraphs for newer templates
        for p in cont.find_all("p"):
            # Skip paragraphs that are themselves inside a link – we will
            # keep only the outer paragraph so we don't double-count link text.
            if p.find_parent("a"):
                continue

            txt = p.get_text(" ", strip=True)
            if not txt:
                continue
            if is_photo_credit(txt) or is_promo_line(txt):
                continue
            st_paragraph_nodes.append(p)

    # If we found a reasonable number of paragraphs in storyline-wrapper, use them
    if len(st_paragraph_nodes) >= 3:
        return st_paragraph_nodes

    # 3. Generic fallback for other sites / older templates

    selectors = [
        '[itemprop="articleBody"]',
        '[property="articleBody"]',
        "article",
        ".article-body",
        ".article__content",
        ".content-body",
        ".rich-text",
        ".field--name-body",
        ".c-article-content",
        ".article-content",
        ".story-content",
        ".field-item",
    ]
    candidates = []
    for sel in selectors:
        nodes = soup.select(sel)
        if nodes:
            candidates.extend(nodes)

    if not candidates:
        main = soup.find("main")
        candidates = [main] if main else [soup]

    best_ps = []
    for node in candidates:
        ps = []
        for p in node.find_all("p"):
            if p.find_parent("a"):
                continue
            txt = p.get_text(" ", strip=True)
            if not txt:
                continue
            if is_photo_credit(txt) or is_promo_line(txt):
                continue
            ps.append(p)

        if len(ps) >= 3:
            best_ps = ps
            break
        elif ps and not best_ps:
            best_ps = ps

    # Last resort: all <p> on the page, with light filtering
    if not best_ps:
        ps = []
        for p in soup.find_all("p"):
            if p.find_parent("a"):
                continue
            txt = p.get_text(" ", strip=True)
            if not txt:
                continue
            if is_photo_credit(txt) or is_promo_line(txt):
                continue
            ps.append(p)
        best_ps = ps

    return best_ps


def extract_article_content_with_lead(soup: BeautifulSoup) -> str:
    """
    Keep one 'lead line' between the promo ('sign up now ... newsletters') and the 'Published ...' header,
    skipping photo credits and bylines. Then include everything after Published (and optional Updated).
    """
    ps = find_content_paragraphs(soup)
    if not ps:
        return ""

    # Normalized paragraph texts
    p_texts = [clean_text(p.get_text(" ", strip=True)) for p in ps]

    # Locate indices of interest
    promo_idx = None
    published_idx = None
    for i, t in enumerate(p_texts):
        if promo_idx is None and is_promo_line(t):
            promo_idx = i
        if published_idx is None and (PUBLISHED_RE.match(t) or t.startswith("Published ")):
            published_idx = i
            break

    # If no 'Published' header, fallback to entire body
    if published_idx is None:
        content = clean_text("\n\n".join(p_texts))
        return cut_at_stop_phrases(content, STOP_PHRASES)

    # Choose lead line: scan from (promo_idx+1 if found else max(published_idx-6,0))
    lead_line = ""
    scan_start = (promo_idx + 1) if promo_idx is not None else max(published_idx - 6, 0)
    for j in range(scan_start, published_idx):
        cand = p_texts[j]
        if not cand:
            continue
        if is_photo_credit(cand) or is_byline(cand) or is_promo_line(cand):
            continue
        # Prefer a substantive sentence (ends with .!? or length threshold)
        if len(cand) >= 40 or re.search(r"[.!?]$", cand):
            lead_line = cand
            break

    # Skip 'Published ...' and optional 'Updated ...'
    start_idx = published_idx + 1
    if start_idx < len(p_texts) and (
        UPDATED_RE.match(p_texts[start_idx]) or p_texts[start_idx].startswith("Updated ")
    ):
        start_idx += 1

    # Join parts
    parts = []
    if lead_line:
        parts.append(lead_line)
    parts.extend(p_texts[start_idx:])

    content = clean_text("\n\n".join(parts))
    content = cut_at_stop_phrases(content, STOP_PHRASES)

    # Fallback if empty
    if not content:
        content = clean_text("\n\n".join(p_texts))
        content = cut_at_stop_phrases(content, STOP_PHRASES)

    return content


# ========== ARTICLE PARSING (METADATA + CONTENT, USING DRIVER) ==========

def parse_article(url: str, driver: webdriver.Chrome):
    """
    Use the logged-in Selenium browser to fetch an article URL once
    and return:
    {
        "title": ...,
        "url": ...,
        "published_date": ...,
        "content": ...,
        "char_count": ...
    }
    or None if parsing fails.
    """
    driver.get(url)
    # If the page is JS-heavy, you can wait a bit or add explicit waits here
    time.sleep(1.5)  # adjust if needed

    html = driver.page_source
    soup = BeautifulSoup(html, "lxml")

    # ---- Title ----
    title = (soup.find("meta", {"property": "og:title"}) or {}).get("content")
    if not title:
        h1 = soup.find("h1")
        title = h1.get_text(" ", strip=True) if h1 else None

    # ---- Published time ----
    pub = None
    for tag, attrs in [
        ("meta", {"property": "article:published_time"}),
        ("meta", {"name": "article:published_time"}),
        ("meta", {"itemprop": "datePublished"}),
        ("meta", {"name": "pubdate"}),
    ]:
        m = soup.find(tag, attrs=attrs)
        if m and m.get("content"):
            pub = m["content"].strip()
            break

    if not pub:
        t = soup.find("time")
        if t and (t.get("datetime") or t.get("content")):
            pub = (t.get("datetime") or t.get("content")).strip()

    if not pub:
        m = re.search(
            r"Published\s+[A-Za-z]{3,9}\s+\d{1,2},\s+\d{4}"
            r"(?:,\s+\d{1,2}:\d{2}\s*(?:AM|PM))?",
            soup.get_text("\n", strip=True),
        )
        pub = m.group(0).replace("Published", "").strip() if m else None

    if not title or not pub:
        return None

    # ---- Parse datetime and convert to SGT ----
    dt = dtparser.parse(pub)
    dt = TZ_SGT.localize(dt) if dt.tzinfo is None else dt.astimezone(TZ_SGT)

    # ---- Extract content ----
    content = extract_article_content_with_lead(soup)
    char_count = len(content)

    return {
        "title": title,
        "url": url,
        "published_date": dt.isoformat(),  # renamed from published_sgt
        "content": content,
        "char_count": char_count,
    }


# ========== CHECKPOINT HELPER ==========

def append_checkpoint_row(row_dict: dict, checkpoint_path: str):
    """
    Append a single article row to the checkpoint CSV.
    Creates the file with header if it does not exist yet.
    """
    fieldnames = ["website", "title", "url", "published_date", "content", "char_count"]

    file_exists = os.path.exists(checkpoint_path)

    with open(checkpoint_path, "a", newline="", encoding="utf-8-sig") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        out_row = {k: row_dict.get(k, "") for k in fieldnames}
        writer.writerow(out_row)


# ========== MAIN WORKFLOW (SITEMAPS → CHECKPOINT → EXCEL) ==========

def main():
    # 0. Open browser and let user log in manually
    driver = create_logged_in_driver()

    try:
        # 1. Collect all matching URLs + website labels from sitemaps
        url_records = []  # list of (url, website_label)
        for m in MONTHS:
            print(f"Fetching sitemap for month {m}...")
            xml = get_sitemap(SITEMAP_TMPL.format(m=m))
            sx = BeautifulSoup(xml, "xml")
            for loc in sx.find_all("loc"):
                u = loc.get_text(strip=True)
                website_label = classify_website(u)
                if website_label:
                    url_records.append((u, website_label))

        # 2. Deduplicate by URL (keep first label encountered)
        seen = {}
        unique_records = []
        for u, label in url_records:
            if u not in seen:
                seen[u] = label
                unique_records.append((u, label))

        print(f"Found {len(unique_records)} URLs for configured websites.")

        # 3. Load checkpoint (if exists) to know which URLs are already done
        done_urls = set()
        if os.path.exists(CHECKPOINT_CSV):
            print(f"Found existing checkpoint: {CHECKPOINT_CSV}")
            try:
                df_ckpt = pd.read_csv(CHECKPOINT_CSV, encoding="utf-8-sig")
                if "url" in df_ckpt.columns:
                    done_urls = set(df_ckpt["url"].dropna().astype(str))
                    print(f"{len(done_urls)} URLs already scraped (from checkpoint).")
            except Exception as e:
                print(f"WARNING: failed to read checkpoint ({e}). Proceeding as if none.")

        total = len(unique_records)

        # 4. For each URL, parse metadata + content using logged-in driver
        for i, (u, website_label) in enumerate(unique_records, 1):
            if u in done_urls:
                print(f"[{i}/{total}] SKIP (already in checkpoint) [{website_label}] - {u}")
                continue

            try:
                art = parse_article(u, driver=driver)
                if art:
                    art["website"] = website_label  # add website label

                    # Append immediately to checkpoint so we don't lose progress
                    append_checkpoint_row(art, CHECKPOINT_CSV)
                    done_urls.add(u)

                    status = f"OK (chars={art['char_count']})"
                else:
                    status = "SKIP (missing title/pubdate)"
            except Exception as e:
                status = f"ERR ({e})"
            print(f"[{i}/{total}] {status} [{website_label}] - {u}")
            time.sleep(SLEEP_BETWEEN_ARTICLES)

        # 5. Build final DataFrame from checkpoint, drop duplicates, and write Excel
        if os.path.exists(CHECKPOINT_CSV):
            df_out = pd.read_csv(CHECKPOINT_CSV, encoding="utf-8-sig")

            # Drop duplicates based on title + content
            before = len(df_out)
            df_out = df_out.drop_duplicates(subset=["title", "content"], keep="first")
            after = len(df_out)
            print(f"Removed {before - after} duplicate rows based on (title, content).")

            with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
                df_out.to_excel(writer, sheet_name="articles", index=False)

            print(f"Wrote {len(df_out)} rows to {OUTPUT_XLSX}")
        else:
            print("No checkpoint file found; nothing to write.")
    finally:
        # Always close the browser
        driver.quit()


if __name__ == "__main__":
    main()


A browser window has opened.
1) Please log in to The Straits Times with your subscription in that window.
2) Once you see that you're logged in (e.g. access to full articles),
   come back to this terminal and press Enter to start scraping.


After you have successfully logged in, press Enter here to continue...  


Fetching sitemap for month 01...
Fetching sitemap for month 02...
Fetching sitemap for month 03...
Fetching sitemap for month 04...
Fetching sitemap for month 05...
Fetching sitemap for month 06...
Fetching sitemap for month 07...
Fetching sitemap for month 08...
Fetching sitemap for month 09...
Fetching sitemap for month 10...
Fetching sitemap for month 11...
Fetching sitemap for month 12...
Found 147 URLs for configured websites.
[1/147] OK (chars=3750) [housing] - https://www.straitstimes.com/singapore/housing/tengah-residents-welcome-new-bus-stops-hope-for-more-bus-services-to-be-added
[2/147] OK (chars=4856) [housing] - https://www.straitstimes.com/singapore/housing/first-executive-condo-launch-of-2024-sells-53-of-units
[3/147] OK (chars=7454) [housing] - https://www.straitstimes.com/singapore/housing/2-former-schools-next-to-commonwealth-mrt-station-to-make-way-for-housing
[4/147] OK (chars=4036) [housing] - https://www.straitstimes.com/singapore/housing/new-homes-to-be-built-wit

# merge all scraped data files

In [103]:
import pandas as pd

df2020 = pd.read_excel("Output/ST/st_articles_2020_Jan_to_Dec.xlsx")
df2021 = pd.read_excel("Output/ST/st_articles_2021_Jan_to_Dec.xlsx")
df2022 = pd.read_excel("Output/ST/st_articles_2022_Jan_to_Dec.xlsx")
df2023 = pd.read_excel("Output/ST/st_articles_2023_Jan_to_Dec.xlsx")
df2024 = pd.read_excel("Output/ST/st_articles_2024_Jan_to_Dec.xlsx")
df2025 = pd.read_excel("Output/ST/st_articles_2025_Jan_to_Oct.xlsx")

In [2]:
len(df2020)

160

In [3]:
len(df2021)

210

In [4]:
len(df2022)

234

In [5]:
len(df2023)

142

In [6]:
len(df2024)

141

In [7]:
len(df2025)

110

In [104]:
# concat all
df = pd.concat([df2020, df2021, df2022, df2023, df2024, df2025], ignore_index=True)

In [105]:
# keep relevant columns
df.rename(columns={'char_count': 'content_chars'}, inplace=True)
df = df[['title', 'url', 'published_date', 'content', 'content_chars']]

In [106]:
# drop duplicates
df = df.drop_duplicates(keep='first').reset_index(drop=True)


In [107]:
# check if any duplicates
df['title'].duplicated().any()

True

In [108]:
# check if any duplicates
df['url'].duplicated().any()

False

In [109]:
len(df[df.duplicated(subset=['title'], keep=False)])

11

In [110]:
df[df.duplicated(subset=['title'], keep=False)]


Unnamed: 0,title,url,published_date,content,content_chars
183,HDB upgraders help boost new condo prices in suburbs,https://www.straitstimes.com/business/property/hdb-upgraders-help-boost-new-condo-prices-in-suburbs,2021-02-14T05:00:00+08:00,"The price gap between new private homes and older ""pre-loved"" ones in the suburbs has widened further on the back of a buying spree from HDB upgraders armed with cheap mortgages.\n\nNew homes typically command a premium over resale units, due in part to ever-increasing prices for development land, but the overall differential widened last year to about 37 per cent from 32.4 per cent in 2019.\n\nDrill down into the data from OrangeTee & Tie Research and the differences in suburban areas were even starker, with the gap widening the most since 1995, noted Ms Christine Sun, the firm's senior vice-president of research and analytics.\n\nThe price differential between new and resale homes in the suburbs - known as outside central region - where Housing Board upgraders tend to buy, widened to 47.3 per cent, up from an already significant 38.7 per cent in 2019 and 34.2 per cent in 2018.\n\nThis came as suburban new home values defied the pandemic to jump 7.9 per cent last year, with median prices of new launches reaching $1,548 per sq ft, compared with $1,051 psf for resale units, said ERA Realty's head of research and consultancy, Mr Nicholas Mak.\n\nResale prices in these areas rose only 1.6 per cent.\n\nViewing restrictions amid the pandemic last year also dampened the resale market. During the circuit breaker from April to June, for instance, people were prevented from viewing homes in person.\n\nMoreover, many buyers prefer to view a resale unit to check its condition, while new launches are bought on plan because they are not yet completed, Ms Sun said.\n\nRISING HDB RESALE PRICES SPUR DEMAND\n\nDemand from upgraders could continue this year in the light of HDB resale flat prices leaping 5 per cent last year - the steepest growth since 2012, when values jumped 6.5 per cent, analysts said.\n\nThey noted that a slew of HDB policy changes rolled out in the past two years are taking effect, including allowing buyers greater flexibility in using Central Provident Fund (CPF) money to buy older flats, enhancing the CPF Housing Grant, raising income ceilings and allowing larger home loans for eligible buyers.\n\nA boost also came from stimulus measures during the lockdown such as the Jobs Support Scheme and legislating temporary measures on moratoriums or waivers on commercial rents, home mortgages and interest payments.\n\nHDB resales hit an eight-year high last year, up 4.4 per cent to 24,748 units sold, from 23,714 units in 2019.\n\nMs Alice Tan, head of consultancy at Knight Frank Singapore, said many of these buyers were able to enter the private market as they had recently completed their five-year minimum occupation period.\n\nMr Wong Xian Yang, associate director of research for Singapore and South-east Asia at Cushman & Wakefield, said the initial loan payments for buyers for new launches under construction are much lower due to the progressive payment scheme. This is where home owners pay for a property according to the stage of construction it is at.\n\nEngineer Emilia Wong sold her five-room, 110 sq m flat in Taman Jurong for $570,000 after it reached the five-year point in August 2019. She upgraded to a $1.4 million 95 sq m three-bedder at Le Quest condo last July, a move delayed by a few months because of the circuit breaker.\n\n""We downsized a little, but the condo has a lot of amenities, including a community mall, and our kids love swimming a lot. So, if we can afford Le Quest, why not?"" said Ms Wong, 36, who is married with three children.\n\nShe and her civil servant husband Md Faizal Salim pay a monthly loan instalment of almost $4,000 for the apartment, after making a 20 per cent down payment with their savings. They bought a new condo unit as they believe the potential for capital appreciation is higher than resale units.\n\n""We wanted to live in the west because our parents live in Jurong and our kids study in schools there. I am working in Tuas and my husband works at Jurong Island,"" she said.\n\nThe couple had considered Whistler Grand and Parc Clematis, but chose Le Quest as it was near completion last year.\n\nHowever, fewer property launches are expected this year.\n\nMost mega-projects have already been launched in the past two years, said Ms Sun. In addition, the projects stemming from the spate of aggressive land deals in 2017 and 2018 are nearing the peak and are expected to taper off from next year, she added.\n\nIS THE SITUATION SUSTAINABLE?\n\nAnalysts said the gap in suburban home prices is unsustainable as resales may start to look appealing to cost-conscious buyers.\n\nA case in point, the prices of new launches in the prime or core central region began to drop last year after the gap widened to as much as 47 per cent in 2018, said Ms Tricia Song, head of research (Singapore) for Colliers International.\n\nSo, the same trend may be seen in suburban private property.\n\n""The widening gap indicates resales could represent more value for money on a per square foot basis, so I think more demand will shift to resale homes,"" said Ms Song.\n\nThe resale market could also see more interest if the collective sale market starts to stir as developers are looking to acquire land in the face of dwindling unsold inventory, said Mr Wong.\n\nMeanwhile, the price differential in prime areas has grown narrower, as foreigner buying, particularly in the luxury market, slumped to a 17-year low last year due to global travel restrictions and lockdowns.\n\nThe gap between new and resale prices in prime districts narrowed to 37.7 per cent last year from 45.8 per cent in 2019 and 47.2 per cent in 2018, said Mr Mak.\n\nMedian prices of new prime district homes sold by developers fell 7.6 per cent last year from 2019, faster than the drop in resale prices over the same period, he added.\n\nNevertheless, mainland Chinese remained the top foreign buyers of new non-landed homes in Singapore last year.\n\nSome of these buyers may have diverted their funds to property to combat the devaluation of the Chinese renminbi amid the ongoing economic crisis.\n\nTo many Chinese investors, property in Singapore is highly attractive in terms of the potential for capital appreciation, Ms Sun said.\n\nWhile demand in the overall resale market is good, some owners have difficulty selling due to other issues.\n\nPropNex agent Kelvin Tan, who has been marketing a two-bedder at SunGlade in Serangoon since January last year at $1.05 million, said the condo unit's owner has received four offers so far for the 861 sq ft leasehold unit.\n\n""The offers came in at $950,000, $980,000. We also had an offer at $1.02 million in February last year, but the buyer had problems selling his flat, so we couldn't close (the deal),"" he added.\n\n""Just two weeks ago, we got an offer of $1.03 million from someone buying for his own stay,"" Mr Tan said.\n\nBut the condo unit's owner, a self-employed businessman, is now in a bind because his condo tenant wants to renew the lease for one more year.\n\n""So for now, the owner can sell only to investors,"" Mr Tan added.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",7082
184,HDB upgraders help boost new condo prices in suburbs,https://www.straitstimes.com/business/property/hdb-upgraders-help-boost-new-condo-prices-in-suburbs-0,2021-02-14T05:00:00+08:00,"SINGAPORE - The price gap between new private homes and older ""pre-loved"" ones in the suburbs has widened further on the back of a buying spree from HDB upgraders armed with cheap mortgages.\n\nNew homes typically command a premium over resale units, due in part to ever-increasing prices for development land, but the overall differential widened last year to about 37 per cent from 32.4 per cent in 2019.\n\nDrill down into the data from OrangeTee & Tie Research and the differences in suburban areas were even starker, with the gap widening the most since 1995, noted Ms Christine Sun, the firm's senior vice-president of research and analytics.\n\nThe price differential between new and resale homes in the suburbs - known as outside central region - where Housing Board upgraders tend to buy, widened to 47.3 per cent, up from an already significant 38.7 per cent in 2019 and 34.2 per cent in 2018.\n\nThis came as suburban new home values defied the pandemic to jump 7.9 per cent last year, with median prices of new launches reaching $1,548 per sq ft, compared with $1,051 psf for resale units, said ERA Realty's head of research and consultancy, Mr Nicholas Mak.\n\nResale prices in these areas rose only 1.6 per cent.\n\nViewing restrictions amid the pandemic last year also dampened the resale market. During the circuit breaker from April to June, for instance, people were prevented from viewing homes in person.\n\nMoreover, many buyers prefer to view a resale unit to check its condition, while new launches are bought on plan because they are not yet completed, Ms Sun said.\n\nRising HDB resale prices spur demand\n\nDemand from upgraders could continue this year in the light of HDB resale flat prices leaping 5 per cent last year - the steepest growth since 2012, when values jumped 6.5 per cent, analysts said.\n\nThey noted that a slew of HDB policy changes rolled out in the past two years are taking effect, including allowing buyers greater flexibility in using Central Provident Fund (CPF) money to buy older flats, enhancing the CPF Housing Grant, raising income ceilings and allowing larger home loans for eligible buyers.\n\nA boost also came from stimulus measures during the lockdown such as the Jobs Support Scheme and legislating temporary measures on moratoriums or waivers on commercial rents, home mortgages and interest payments.\n\nHDB resales hit an eight-year high last year, up 4.4 per cent to 24,748 units sold, from 23,714 units in 2019.\n\nMs Alice Tan, head of consultancy at Knight Frank Singapore, said many of these buyers were able to enter the private market as they had recently completed their five-year minimum occupation period.\n\nMr Wong Xian Yang, associate director of research for Singapore and South-east Asia at Cushman & Wakefield, said the initial loan payments for buyers for new launches under construction are much lower due to the progressive payment scheme. This is where home owners pay for a property according to the stage of construction it is at.\n\nEngineer Emilia Wong sold her five-room, 110 sq m flat in Taman Jurong for $570,000 after it reached the five-year point in August 2019. She upgraded to a $1.4 million 95 sq m three-bedder at Le Quest condo last July, a move delayed by a few months because of the circuit breaker.\n\n""We downsized a little, but the condo has a lot of amenities, including a community mall, and our kids love swimming a lot. So, if we can afford Le Quest, why not?"" said Ms Wong, 36, who is married with three children.\n\nShe and her civil servant husband Md Faizal Salim pay a monthly loan instalment of almost $4,000 for the apartment, after making a 20 per cent down payment with their savings. They bought a new condo unit as they believe the potential for capital appreciation is higher than resale units.\n\n""We wanted to live in the west because our parents live in Jurong and our kids study in schools there. I am working in Tuas and my husband works at Jurong Island,"" she said.\n\nThe couple had considered Whistler Grand and Parc Clematis, but chose Le Quest as it was near completion last year.\n\nHowever, fewer property launches are expected this year.\n\nMost mega-projects have already been launched in the past two years, said Ms Sun. In addition, the projects stemming from the spate of aggressive land deals in 2017 and 2018 are nearing the peak and are expected to taper off from next year, she added.\n\nIs the situation sustainable?\n\nAnalysts said the gap in suburban home prices is unsustainable as resales may start to look appealing to cost-conscious buyers.\n\nA case in point, the prices of new launches in the prime or core central region began to drop last year after the gap widened to as much as 47 per cent in 2018, said Ms Tricia Song, head of research (Singapore) for Colliers International.\n\nSo, the same trend may be seen in suburban private property.\n\n""The widening gap indicates resales could represent more value for money on a per square foot basis, so I think more demand will shift to resale homes,"" said Ms Song.\n\nThe resale market could also see more interest if the collective sale market starts to stir as developers are looking to acquire land in the face of dwindling unsold inventory, said Mr Wong.\n\nMeanwhile, the price differential in prime areas has grown narrower, as foreigner buying, particularly in the luxury market, slumped to a 17-year low last year due to global travel restrictions and lockdowns.\n\nThe gap between new and resale prices in prime districts narrowed to 37.7 per cent last year from 45.8 per cent in 2019 and 47.2 per cent in 2018, said Mr Mak.\n\nMedian prices of new prime district homes sold by developers fell 7.6 per cent last year from 2019, faster than the drop in resale prices over the same period, he added.\n\nNevertheless, mainland Chinese remained the top foreign buyers of new non-landed homes in Singapore last year.\n\nSome of these buyers may have diverted their funds to property to combat the devaluation of the Chinese renminbi amid the ongoing economic crisis.\n\nTo many Chinese investors, property in Singapore is highly attractive in terms of the potential for capital appreciation, Ms Sun said.\n\nWhile demand in the overall resale market is good, some owners have difficulty selling due to other issues.\n\nPropNex agent Kelvin Tan, who has been marketing a two-bedder at SunGlade in Serangoon since January last year at $1.05 million, said the condo unit's owner has received four offers so far for the 861 sq ft leasehold unit.\n\n""The offers came in at $950,000, $980,000. We also had an offer at $1.02 million in February last year, but the buyer had problems selling his flat, so we couldn't close (the deal),"" he added.\n\n""Just two weeks ago, we got an offer of $1.03 million from someone buying for his own stay,"" Mr Tan said.\n\nBut the condo unit's owner, a self-employed businessman, is now in a bind because his condo tenant wants to renew the lease for one more year.\n\n""So for now, the owner can sell only to investors,"" Mr Tan added.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",7094
225,"More than 540,000 HDB flat owners to pay less in home insurance premiums",https://www.straitstimes.com/singapore/housing/more-than-540000-hdb-flat-owners-to-pay-less-in-home-insurance-premiums,2021-06-25T05:00:00+08:00,"More than 540,000 Housing Board flat owners will, on average, pay 10 per cent less in premiums for a Central Provident Fund (CPF) home insurance scheme from July 1.\n\nThe CPF Board said yesterday that it would be reducing premiums for the Home Protection Scheme (HPS) owing to ""better-than-expected investment returns and claims experience"".\n\nThe scheme protects CPF members and their families from the risk of losing their HDB flats in the event of death, terminal illness or total permanent disability before their housing loans are paid up.\n\nLast year, $83.8 million was paid out in claims to home owners who were insured under HPS.\n\nThe last time a reduction in premiums for the scheme was made was in 2018.\n\nThe CPF Board said it conducts periodic reviews to ensure that HPS premiums remain affordable, while maintaining the long-term sustainability of the HPS fund.\n\nThe new rates will kick in for members who join the scheme on or after July 1, while existing members will pay the lower prices when they pay their annual premium or adjust their coverage on or after July 1.\n\nFor example, a male member aged 36 with a $200,000 housing loan from the HDB for 30 years will pay a reduced annual premium of $209.40 instead of $232.40 - equivalent to a 10 per cent reduction - when he joins the scheme from July 1, said the CPF Board.\n\n""The reduction in insurance premiums will help with my family's housing finances in the long term - a penny saved is a penny earned,"" said Ms Peggy Lim, 34, a logistics executive who is a first-time flat owner.\n\nCPF members who are using their CPF savings to pay for their housing loans have to be insured under HPS, said the CPF Board.\n\nThose not using CPF savings to pay for their housing loans can also apply for the insurance.\n\nFrom next month, potential home buyers can use the HPS calculator on the CPF website to estimate their new premiums.\n\nFor more information, visit the HPS page on the CPF website or contact the CPF Board, also through the website.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",2102
226,"More than 540,000 HDB flat owners to pay less in home insurance premiums",https://www.straitstimes.com/singapore/housing/540000-cpf-members-to-pay-less-for-home-protection-insurance,2021-06-24T12:20:39+08:00,"SINGAPORE - More than 540,000 Housing Board (HDB) flat owners will, on average, pay 10 per cent less in premiums for a Central Provident Fund (CPF) home insurance scheme from July 1.\n\nThe CPF Board on Thursday (June 24) said it is reducing premiums for the Home Protection Scheme (HPS) ""due to better-than-expected investment returns and claims experience"".\n\nThe scheme protects CPF members and their families from the risk of losing their HDB flats in the event of death, terminal illness, or total permanent disability before their housing loans are paid up.\n\nLast year, $83.8 million was paid out in claims to home owners insured under HPS.\n\nThe last time a reduction in premiums for the scheme was made was in 2018.\n\nPeriodic reviews are conducted by the CPF Board to ensure that HPS premiums remain affordable, while maintaining the long-term sustainability of the HPS fund, said the CPF.\n\nThe new rates will kick in for members who join the scheme on or after July 1, while existing members will pay the lower prices when they pay their annual premium or adjust their coverage on or after July 1.\n\nFor example, a male member aged 36 with a $200,000 housing loan from HDB for 30 years will pay a reduced annual premium of $209.40 instead of $232.40 - equivalent to a 10 per cent reduction - when he joins the scheme from July 1 this year, said the CPF.\n\n""The reduction in insurance premiums will help with my family's housing finances in the long term - a penny saved is a penny earned,"" said Ms Peggy Lim, 34, a logistics executive who is a first-time flat owner.\n\nCPF members who are using CPF savings to pay for their housing loans have to be insured under HPS, said the CPF.\n\nThose not using their CPF savings to pay for their housing loans can also apply for the insurance.\n\nFrom next month, potential home buyers can use the HPS calculator on the CPF website to estimate their new premiums.\n\nFor more information, visit the HPS website or contact CPF Board through the CPF website.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",2097
378,HDB resale market likely to remain robust this year,https://www.straitstimes.com/singapore/housing/analysis-hdb-resale-market-likely-to-remain-robust-this-year,2022-01-15T05:00:00+08:00,"The new year has begun much as the old one ended with Housing Board flats changing hands at sky-high prices.\n\nLast year, a record 259 HDB resale units sold for over $1 million, more than three times the 82 units in 2020 but the blistering pace of big-buck sales has hardly slackened so far this year.\n\nJust 13 days into 2022, 16 resale units had already changed hands for at least $1 million.\n\nThe 2021 figure was initially reported at 261 but it has been revised to 259 in HDB records.\n\nThere were 36 HDB resale flats sold for at least $1 million last month - the highest monthly figure - despite new property cooling measures kicking in on Dec 16.\n\nBuyers have not missed a beat this month as well, with more than one resale unit being sold for at least $1 million each day on average.\n\nThis begs the million-dollar question: Will the new cooling measures chill the high-priced flat spree?\n\nWith close to 5,000 HDB homes in sought-after areas such as Bukit Merah, Toa Payoh, Queenstown, Clementi and Ang Mo Kio reaching their minimum occupation period (MOP) this year, it is unlikely the million-dollar trend will die down in 2022, although it may increase at a slower pace.\n\nThis is because choice flats in these mature estates, which have seen their fair share of million-dollar units over the years, will continue to draw buyers and thus fetch higher prices, said property analysts.\n\nAfter completing the mandatory five-year MOP, owners can sell their homes on the open market.\n\nHDB data showed that more than 31,000 flats across 16 estates will reach their MOP this year, thus boosting the pool of eligible resale units.\n\nMs Christine Sun, senior vice-president of research and analytics at real estate firm OrangeTee & Tie, said the new cooling measures may not have that big an impact on buyers of pricey units.\n\n""Some buyers of million-dollar flats may not be applying for a maximum loan while others could be borrowing from financial institutions. Those who buy million-dollar HDB flats usually have the financial means as well,"" she said.\n\nThe Dec 16 measures tightened the loan-to-value (LTV) limit for housing loans from the HDB from 90 per cent to 85 per cent, among other steps.\n\nThe LTV limit for those borrowing from financial institutions to buy HDB flats remains unchanged at 75 per cent.\n\nMs Sun noted: ""As prices in the private residential market still remain high in the immediate aftermath of the cooling measures, some buyers may decide to buy million-dollar flats instead since their locations are good and are more affordable when compared with private homes in the city fringe.""\n\nBut property observers were quick to point out that million-dollar deals typically form a small portion of HDB resale transaction volumes and are not representative of the overall market.\n\nFor instance, the 259 flats sold for at least a million dollars last year comprise just under 0.9 per cent of the total HDB resale transactions.\n\nProfessor Sing Tien Foo, director of the Institute of Real Estate and Urban Studies at the National University of Singapore, said while million-dollar HDB deals may continue to make headlines, he does not expect sale numbers to increase significantly this year. ""The number of such deals is likely to remain small, estimated to be below 1 per cent of the annual transaction volume,"" he said.\n\nMr Mohan Sandrasegeran, research and content analyst at real estate agency Ohmyhome, noted that when the 2018 cooling measures kicked in, million-dollar HDB deals dipped slightly, from 71 units in 2018 to 64 in 2019.\n\n""Though we can expect the overall million-dollar trend to continue in 2022, it might be increasing at a slower pace because of the cooling measures in place,"" he said.\n\nHome buyers and market watchers are also keenly observing the HDB resale market for any possible price adjustments, as last month's figures have yet to reflect any impact of the cooling measures.\n\nFlash estimates showed that HDB resale prices rose 12.5 per cent over last year, the highest annual growth recorded since 2010, when prices grew by 14.1 per cent.\n\nFewer resale transactions are expected in the first two months of this year due to the delayed impact of the cooling measures and the typical slowdown in activity during the Chinese New Year, which falls early next month.\n\nHowever, Mr Sandrasegeran said HDB resale demand will likely remain healthy due to the large and steady pool of potential buyers who are primarily first-timers undeterred by the cooling measures.\n\n""The main drivers for the HDB resale market this year will continue to be couples and families who have immediate housing needs and are unable to wait for Build-To-Order (BTO) flats,"" he said.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",4800
380,HDB resale market likely to remain robust this year,https://www.straitstimes.com/singapore/housing/hdb-resale-market-likely-to-remain-robust-this-year,2022-01-15T05:00:00+08:00,"SINGAPORE -The new year has begun much as the old one ended with Housing Board flats changing hands at sky-high prices.\n\nIn 2021, a record 259 HDB resale units sold for over $1 million, more than three times the 82 units in 2020, but the blistering pace of big-buck sales has hardly slackened so far this year.\n\nJust 13 days into 2022, 16 resale units had already changed hands for at least $1 million.\n\nThe 2021 figure was initially reported at 261 but it has been revised to 259 in HDB records.\n\nThere were 36 HDB resale flats sold for at least $1 million last month - the highest monthly figure - despite new property cooling measures kicking in on Dec 16.\n\nBuyers have not missed a beat this month as well, with more than one resale unit being sold for at least $1 million each day on average.\n\nThis begs the million-dollar question: Will the new cooling measures chill the high-priced flat spree?\n\nWith close to 5,000 HDB homes in sought-after areas such as Bukit Merah, Toa Payoh, Queenstown, Clementi and Ang Mo Kio reaching their minimum occupation period (MOP) this year, it is unlikely the million-dollar trend will die down in 2022, although it may increase at a slower pace.\n\nThis is because choice flats in these mature estates, which have seen their fair share of million-dollar units over the years, will continue to draw buyers and thus fetch higher prices, said property analysts.\n\nAfter completing the mandatory five-year MOP, owners can sell their homes on the open market.\n\nHDB data showed that more than 31,000 flats across 16 estates will reach their MOP this year, thus boosting the pool of eligible resale units.\n\nMs Christine Sun, senior vice-president of research and analytics at real estate firm OrangeTee & Tie, said the new cooling measures may not have that big an impact on buyers of pricey units.\n\n""Some buyers of million-dollar flats may not be applying for a maximum loan while others could be borrowing from financial institutions. Those who buy million-dollar HDB flats usually have the financial means as well,"" she said.\n\nThe Dec 16 measures tightened the loan-to-value (LTV) limit for housing loans from the HDB from 90 per cent to 85 per cent, among other steps.\n\nThe LTV limit for those borrowing from financial institutions to buy HDB flats remains unchanged at 75 per cent.\n\nMs Sun noted: ""As prices in the private residential market still remain high in the immediate aftermath of the cooling measures, some buyers may decide to buy million-dollar flats instead since their locations are good and are more affordable when compared with private homes in the city fringe.""\n\nBut property observers were quick to point out that million-dollar deals typically form a small portion of HDB resale transaction volumes and are not representative of the overall market.\n\nFor instance, the 259 flats sold for at least a million dollars last year comprise just under 0.9 per cent of the total HDB resale transactions.\n\nProfessor Sing Tien Foo, director of the Institute of Real Estate and Urban Studies at the National University of Singapore, said while million-dollar HDB deals may continue to make headlines, he does not expect sale numbers to increase significantly this year. ""The number of such deals is likely to remain small, estimated to be below 1 per cent of the annual transaction volume,"" he said.\n\nMr Mohan Sandrasegeran, research and content analyst at real estate agency Ohmyhome, noted that when the 2018 cooling measures kicked in, million-dollar HDB deals dipped slightly, from 71 units in 2018 to 64 in 2019.\n\n""Though we can expect the overall million-dollar trend to continue in 2022, it might be increasing at a slower pace because of the cooling measures in place,"" he said.\n\nHome buyers and market watchers are also keenly observing the HDB resale market for any possible price adjustments, as last month's figures have yet to reflect any impact of the cooling measures.\n\nFlash estimates showed that HDB resale prices rose 12.5 per cent over last year, the highest annual growth recorded since 2010, when prices grew by 14.1 per cent.\n\nFewer resale transactions are expected in the first two months of this year due to the delayed impact of the cooling measures and the typical slowdown in activity during the Chinese New Year, which falls early next month.\n\nHowever, Mr Sandrasegeran said HDB resale demand will likely remain healthy due to the large and steady pool of potential buyers who are primarily first-timers undeterred by the cooling measures.\n\n""The main drivers for the HDB resale market this year will continue to be couples and families who have immediate housing needs and are unable to wait for Build-To-Order (BTO) flats,"" he said.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",4810
391,"Upgrading works slated for 56,000 HDB flats",https://www.straitstimes.com/singapore/housing/upgrading-works-slated-for-56000-hdb-flats,2022-02-20T21:00:00+08:00,"They are among the 230,000 units eligible for new features and elderly-friendly fittings\n\nShark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off",579
393,"Upgrading works slated for 56,000 HDB flats",https://www.straitstimes.com/singapore/housing/upgrading-works-start-for-56000-hdb-flats-built-between-1987-and-1997,2022-02-20T13:00:00+08:00,"SINGAPORE - More homes will be improved with better facilities and fittings, with a second batch of about 230,000 Housing Board flats built between 1987 and 1997 eligible for the Home Improvement Programme (HIP).\n\nUpgrading works will start first for about 56,000 flats from this group, the HDB said on Sunday (Feb 20).\n\nAbout two-thirds of the affected households have been asked if they want to proceed with HIP, since polling began in September 2021.\n\nAs at Dec 31 last year, all polled blocks had at least 75 per cent of eligible households voting in favour of the HIP - a pre-requisite before works can start. The start of works for the expanded HIP scheme, 15 years after the programme was launched in 2007, comes as HIP works for almost all the first batch of 320,000 eligible flats built before 1986 are reaching completion.\n\nOf this group, about 81 per cent - about 259,100 flats - have completed upgrading works, while another 17 per cent - about 55,100 flats - are in various stages of upgrading, said HDB.\n\nAs at March 31 last year, the Government has spent about $3.4 billion on the improvement works for the first batch of flats. The enhancements aim to improve the quality of life for HDB residents, take care of common maintenance problems and address issues related to an ageing population.\n\nThe programme has three components - essential improvement items, optional items and enhancements for active seniors (Ease).\n\nThe essential improvements, such as repair of spalling concrete or structural cracks, and installation of a retractable clothes drying rack, enhance public health and safety standards and are fully paid for by the Government.\n\nOptional improvement items, such as new entrance doors and gates, are subsidised up to 95 per cent depending on flat type.\n\nThe Ease programme, also heavily subsidised, provides elderly-friendly fittings for the homes, such as grab bars, ramps and slip-resistant treatment to toilet floor tiles.\n\nThese fittings are also offered separately under a direct application route for residents who need them, or whose blocks do not qualify for HIP.\n\nTo enable residents to visualise how the Ease fittings will look in their homes, HDB has introduced an augmented reality feature on the Mobile@HDB app since last July.\n\nUsing their digital devices, residents can scan their home with the app, and virtually place and adjust the selected type of grab bars on the walls, or ramps on the floors.\n\nHDB said that more than 255,500 households have applied for Ease, which began in 2012, as at Dec 31 last year.\n\nOf these, about 160,200 opted for it together with the HIP while 95,300 used the direct application route.\n\nThe Government has spent about $102 million on Ease as at March 31 last year, said HDB.\n\nEase was expanded to include ramp solutions for flats with multi-step entrances in December 2018.\n\nThese have been installed in more than 3,000 homes as at Dec 31 last year.\n\nOf these, about 1,940 customised ramps and 1,070 portable ramps were installed.\n\nAbout 2,080 ramps were installed in homes that had applied directly for Ease. The remaining 930 ramps were installed under Ease as part of HIP works.\n\nFor Mr Chua Soo Eng's family, whose upgrades to their four-room flat at Chua Chu Kang were completed on Jan 7 after 10 days of works, the improvements have helped.\n\nHe moved in more than 20 years ago and lives with his wife, 57, a housewife, their 26-year old son and 24-year old daughter. They decided to upgrade both toilets in the flat with new wall and floor tiles, a toilet bowl, a basin and a grab bar.\n\nMr Chua, 57, a businessman in the food and beverage industry, said the toilet now looks much neater and bigger owing to a rearrangement of pipes.\n\nBefore the upgrade, the toilet wall had small tiles that were 15cm by 15cm. Some of the fillings in the crevices had fallen off while some tiles had cracked. It ""looked disgusting and was difficult to wash"", he said.\n\nThe new toilet bowl is also more water-efficient as it requires less flushing than the old one, which had also started to show crack lines, he added.\n\nThe family also opted for a new metal gate for the entrance to their home, with one side of the gate larger than the other. Their old gate had both sides of equal size, which was more inconvenient when carrying large boxes, for example, as both sides then had to be opened, said Mr Chua.\n\n""Overall, the whole family is very happy with it. The 10 days of 'suffering' was all worth it,"" he quipped.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nWorld\n\nLife",4591
395,"Upgrading works slated for 56,000 HDB flats",https://www.straitstimes.com/singapore/housing/upgrading-works-slated-for-56000-hdb-flats1,2022-02-20T05:00:00+08:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off,489
457,HDB compensates BTO flat buyers for delay,https://www.straitstimes.com/singapore/housing/hdb-compensates-bto-flat-buyers-for-delay,2022-05-23T05:00:00+08:00,"Another Build-To-Order (BTO) project - Anchorvale Village in Sengkang - has exceeded the legal completion date, and buyers will be compensated by the Housing Board.\n\nThe project, which has 207 units across two blocks, is now expected to be finished between May and July next year, which is a delay of about four to six months beyond the delivery possession date - the legal contractual date by which HDB is required to hand over the keys.\n\nIn response to queries from The Straits Times, HDB said all buyers will receive the maximum reimbursement sum that they are eligible for, without needing to submit any claims.\n\nOf the 207 two-room flexi and three-room units, 200 have been booked.\n\nThe buyers can expect to be reimbursed between $2,270 and $6,360, based on current estimates of the delay, in a one-off payment about two months after key collection, said HDB.\n\nThose who get their keys in May 2023 will get between $2,270 and $4,240, while those who get theirs in July 2023 will receive between $3,400 and $6,360.\n\nAnchorvale Village was originally slated for completion in the first quarter of this year when it was launched for sale in November 2017.\n\nFactoring in the fresh delays, this brings the total waiting time to about 5.2 years, or about 62 months.\n\nThe first BTO project to exceed its delivery possession date was Waterway Sunrise II in Punggol, which has 1,014 units across seven blocks, after its then main contractor ran into financial difficulties and had to cease operations.\n\nOf the 98 BTO projects currently under construction, Anchorvale Village and Waterway Sunrise II are the only two to have incurred a delay of more than one year and exceeded their delivery possession date, said HDB.\n\nDelays for most BTO projects have been kept to between six and 12 months, after government support and assistance measures were rolled out, said HDB.\n\nIt told ST that Anchorvale Village faces significant challenges that have impacted its construction progress more compared with the other delayed BTO projects.\n\nFor instance, the project, which is integrated with a neighbourhood centre, is located on a built-up site surrounded by existing developments.\n\nThe site along Anchorvale Road sits across the road from Sengkang Riverside Park and Sengkang Sports Centre.\n\n""The tight construction site and more complex design with the integration of commercial and residential components posed additional constraints and challenges,"" said HDB.\n\nChallenges from the pandemic also hurt progress.\n\nThe main contractor Ken-Pal had just begun work on the basement carpark when all construction work had to be halted during the two-month circuit breaker in 2020.\n\nWhen work resumed, progress was impeded by manpower and material shortages, as well as the stringent Covid-19 safe management measures at the work site.\n\nIn December 2020, buyers were informed of a nine-month delay, with completion pushed to the fourth quarter of 2022.\n\nProgress on the super-structural phase of construction - the portion of a building that is constructed above ground level - was also hindered by frequent rain in March and April this year and operational hurdles.\n\nHDB said it conducted a detailed evaluation of the project's progress and timelines last month, and assessed that it will no longer be possible to complete work by the delivery possession date of Jan 31, 2023.\n\nFlat buyers were notified of the fresh delays earlier this month via e-mails and letters sent by HDB and seen by ST.\n\nTrade finance officer Jasmine Goh, 26, who bought a three-room unit in Anchorvale Village with her fiance, said the delay did not come as a surprise, based on the work progress they had observed in the past year.\n\n""We compared the progress of our project with our friend's BTO project that has the same completion period, and ours seemed to be lagging behind,"" she added.\n\nMs Goh, who is expecting her first child at the year end, has applied to HDB for a rental flat while waiting for her new home.\n\n""At one point, we did consider cancelling our BTO flat purchase because of the long wait, but we eventually decided not to. Because of our... age, we can still afford to wait,"" she said.\n\nWith borders reopening and more workers entering Singapore, HDB said manpower constraints have eased significantly in recent months, with almost all BTO sites nearing full manpower capacity.\n\nBut there are still some uncertainties such as the Ukraine-Russia conflict and the Covid-19 situation in China, which may impact material prices and further disrupt the global supply chain, it added.\n\nSwipe. Select. Stay informed.\n\nSingapore\n\nSingapore\n\nAsia\n\nSingapore\n\nSingapore\n\nSingapore\n\nAsia\n\nLife",4664


In [111]:
# there are duplicates because of minor website naming differences

# convert published_date into a datetime column
df['published_date'] = pd.to_datetime(df['published_date'], errors='coerce')

# make sure content_chars is numeric (if it's not already)
df['content_chars'] = pd.to_numeric(df['content_chars'], errors='coerce')

# 1) sort so that for each title:
#    - most recent published_date comes first
#    - if dates tie, higher content_chars comes first
df = df.sort_values(
    ['published_date', 'content_chars'],
    ascending=[False, False]
)

# 2) drop duplicates on title, keeping the "best" row per title
df = df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)

In [112]:
# Remove timezone information
df['published_date'] = df['published_date'].dt.tz_localize(None)
df.to_excel('Output/ST/merged_st_articles_2020_Jan_to_2025_Oct.xlsx', index=False)

# patch the scraped data

In [72]:
import pandas as pd
import re

In [92]:
df = pd.read_excel('Output/ST/merged_st_articles_2020_Jan_to_2025_Oct.xlsx')

In [93]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 985 entries, 0 to 984
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   title           985 non-null    object        
 1   url             985 non-null    object        
 2   published_date  985 non-null    datetime64[ns]
 3   content         985 non-null    object        
 4   content_chars   985 non-null    int64         
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 38.6+ KB


In [94]:
# manually update an article url
# originally: https://www.straitstimes.com/singapore/housing/upgrading-works-slated-for-56000-hdb-flats
# to update to: https://www.straitstimes.com/singapore/housing/upgrading-works-start-for-56000-hdb-flats-built-between-1987-and-1997

df.loc[df['url'] == 'https://www.straitstimes.com/singapore/housing/upgrading-works-slated-for-56000-hdb-flats', 'url'] = 'https://www.straitstimes.com/singapore/housing/upgrading-works-start-for-56000-hdb-flats-built-between-1987-and-1997'
df.loc[df['url'] == 'https://www.straitstimes.com/singapore/housing/upgrading-works-start-for-56000-hdb-flats-built-between-1987-and-1997', 'published_date'] = pd.to_datetime("2022-02-20 13:00:00")


In [95]:
pd.set_option('display.max_colwidth', None)

print(df.loc[df['title'] == "BTO sales: Over 5,700 flats on offer", 'content'].to_string(index=False))


An artist's impression of ParkView @ Bidadari.\n\nAlready a subscriber? Log in\n\nGet full access to trusted news\n\nUnlock every story, analysis and exclusive from Singapore’s most established newsroom\n\nEnjoy these subscriber benefits\n\nFranklin Sports\n\n$49.83\n\n$76.92 35% off\n\nFossil\n\n$103.98\n\n$235.00 56% off\n\nMiffy\n\n$10.00\n\n$29.90 67% off\n\nOrico\n\n$21.00\n\n$40.00 48% off\n\nJo Malone\n\nFree Gifts\n\n$78.00\n\nPhilips\n\n$28.49\n\n$49.00 42% off\n\nAdidas\n\n$44.90\n\n$69.00 35% off\n\nHydroJug\n\n$29.84\n\n$56.07 47% off\n\nCornell\n\n$39.80\n\n$129.00 69% off\n\nVanish\n\n$15.00\n\n$19.10 21% off\n\nAkemi\n\n$16.07\n\n$38.00 58% off\n\n&honey\n\n$20.28\n\n$33.80 40% off\n\nMiniso\n\n$5.90\n\n$7.91 25% off\n\nHatchimals\n\n$12.78\n\n$31.81 60% off\n\nTiger\n\n$157.00\n\n$399.00 61% off


In [96]:
pattern = (
    r"(Shark\s*\$329\.00\s*\$649\.00\s*49% off)"
    r"|"
    r"(Franklin Sports\s*\$49\.83)"
)

mask = df['content'].str.contains(pattern, case=False, regex=True, na=False)

error_df = df.loc[mask]
len(error_df)


  mask = df['content'].str.contains(pattern, case=False, regex=True, na=False)


8

In [97]:
error_df

Unnamed: 0,title,url,published_date,content,content_chars
423,BTO flats priced for various budgets,https://www.straitstimes.com/singapore/housing/housing-bto-flats-priced-for-various-budgets,2022-10-18 05:00:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off\n\nNew Subscriber Exclusive\n\nUnwrap more than just news\n\nSubscribe to ST All-Digital + Print for $29.90/month and enjoy $180 off your Takashimaya shopping\n\n24-month contract | T&Cs apply,674
432,Smaller flats' resale prices tracked,https://www.straitstimes.com/singapore/housing/hdb-smaller-flats-resale-prices-tracked,2022-10-09 05:00:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off\n\nNew Subscriber Exclusive\n\nUnwrap more than just news\n\nSubscribe to ST All-Digital + Print for $29.90/month and enjoy $180 off your Takashimaya shopping\n\n24-month contract | T&Cs apply,674
457,266 million-dollar resales so far this year,https://www.straitstimes.com/singapore/hdb-266-million-dollar-resales-so-far-this-year,2022-09-26 05:00:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off\n\nNew Subscriber Exclusive\n\nUnwrap more than just news\n\nSubscribe to ST All-Digital + Print for $29.90/month and enjoy $180 off your Takashimaya shopping\n\n24-month contract | T&Cs apply,674
466,"HDB, condo rents rise again",https://www.straitstimes.com/singapore/housing/housing-hdb-condo-rents-rise-again,2022-09-15 05:00:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off\n\nNew Subscriber Exclusive\n\nUnwrap more than just news\n\nSubscribe to ST All-Digital + Print for $29.90/month and enjoy $180 off your Takashimaya shopping\n\n24-month contract | T&Cs apply,674
520,Revised occupation period rule,https://www.straitstimes.com/singapore/housing/sers-%E2%80%A2-revised-occupation-period-rule,2022-06-18 05:00:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off\n\nNew Subscriber Exclusive\n\nUnwrap more than just news\n\nSubscribe to ST All-Digital + Print for $29.90/month and enjoy $180 off your Takashimaya shopping\n\n24-month contract | T&Cs apply,674
595,"Upgrading works slated for 56,000 HDB flats",https://www.straitstimes.com/singapore/housing/upgrading-works-start-for-56000-hdb-flats-built-between-1987-and-1997,2022-02-20 13:00:00,"They are among the 230,000 units eligible for new features and elderly-friendly fittings\n\nShark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off",579
806,Minister calls for prudence,https://www.straitstimes.com/singapore/housing/minister-calls-for-prudence,2021-02-24 05:00:00,Shark\n\n$329.00\n\n$649.00 49% off\n\nWMF\n\n$23.99\n\n$49.00 51% off\n\nSkater\n\n$8.73\n\n$19.00 54% off\n\nTefal\n\n$78.99\n\n$169.00 53% off\n\nKate Spade\n\n$14.98\n\n$28.00 47% off\n\nUgreen\n\n$37.98\n\n$62.99 40% off\n\nDyson\n\n$379.00\n\n$549.00 31% off\n\nSukin\n\n$13.30\n\n$36.00 63% off\n\nOwala\n\n$30.11\n\n$48.00 37% off\n\nDuracell\n\n$11.40\n\n$19.01 40% off\n\nKipling\n\n$29.98\n\n$40.43 26% off\n\nBodum\n\n$19.98\n\n$36.80 46% off\n\nLego\n\n$11.90\n\n$14.90 20% off\n\nUSAopoly\n\n$33.63\n\n$66.00 49% off\n\nCharlie Mackesy\n\n$23.28\n\n$59.00 61% off,489
855,"BTO sales: Over 5,700 flats on offer",https://www.straitstimes.com/singapore/housing/over-5700-flats-on-offer,2020-11-18 05:00:00,"An artist's impression of ParkView @ Bidadari.\n\nAlready a subscriber? Log in\n\nGet full access to trusted news\n\nUnlock every story, analysis and exclusive from Singapore’s most established newsroom\n\nEnjoy these subscriber benefits\n\nFranklin Sports\n\n$49.83\n\n$76.92 35% off\n\nFossil\n\n$103.98\n\n$235.00 56% off\n\nMiffy\n\n$10.00\n\n$29.90 67% off\n\nOrico\n\n$21.00\n\n$40.00 48% off\n\nJo Malone\n\nFree Gifts\n\n$78.00\n\nPhilips\n\n$28.49\n\n$49.00 42% off\n\nAdidas\n\n$44.90\n\n$69.00 35% off\n\nHydroJug\n\n$29.84\n\n$56.07 47% off\n\nCornell\n\n$39.80\n\n$129.00 69% off\n\nVanish\n\n$15.00\n\n$19.10 21% off\n\nAkemi\n\n$16.07\n\n$38.00 58% off\n\n&honey\n\n$20.28\n\n$33.80 40% off\n\nMiniso\n\n$5.90\n\n$7.91 25% off\n\nHatchimals\n\n$12.78\n\n$31.81 60% off\n\nTiger\n\n$157.00\n\n$399.00 61% off",724


In [98]:
# function to rescrape article

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

def init_browser():
    chrome_options = Options()
    chrome_options.add_argument("--start-maximized")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")

    driver = webdriver.Chrome(options=chrome_options)

    # Open ST homepage for login
    LOGIN_URL = "https://www.straitstimes.com/"
    driver.get(LOGIN_URL)

    print("🔐 Please login manually in the Chrome window...")
    input("➡️  Press ENTER here after you have successfully logged in...")

    print("✅ Login detected. Browser session ready.")
    return driver

# Initialize once
driver = init_browser()

from bs4 import BeautifulSoup
import pandas as pd
import time

def patch_st_article_content(df, url, driver, content_column="content"):
    """
    Re-scrape a Straits Times article using Selenium (to bypass paywall)
    and update dataframe in-place.
    """

    print(f"\nRe-scraping: {url}")

    try:
        driver.get(url)
        time.sleep(2)  # allow page to load fully after paywall gate
        html = driver.page_source

    except Exception as e:
        print("❌ Selenium error while loading page:", e)
        return None

    soup = BeautifulSoup(html, "lxml")

    # Extract article wrapper
    wrapper = soup.select_one("div.storyline-wrapper.default")

    if wrapper is None:
        print("❌ storyline-wrapper not found. Page may still be behind paywall.")
        return None

    # Extract all paragraphs inside archival-wrapper
    paragraphs = [
        div.get_text(" ", strip=True)
        for div in wrapper.select("div.archival-wrapper")
    ]

    if not paragraphs:
        print("❌ No archival-wrapper text found.")
        return None

    scraped_content = "\n\n".join(paragraphs)

    # Update df
    mask = df["url"] == url
    updated_rows = mask.sum()

    if updated_rows > 0:
        df.loc[mask, content_column] = scraped_content
        print(f"✅ Updated {updated_rows} row(s).")
    else:
        print("⚠️ URL not found in dataframe.")

    return scraped_content


🔐 Please login manually in the Chrome window...


➡️  Press ENTER here after you have successfully logged in... 


✅ Login detected. Browser session ready.


In [99]:
urls_to_patch = error_df['url'].dropna().unique()

for url in urls_to_patch:
    patch_st_article_content(df, url, driver=driver)
    time.sleep(SLEEP_BETWEEN_ARTICLES)



Re-scraping: https://www.straitstimes.com/singapore/housing/housing-bto-flats-priced-for-various-budgets
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/housing/hdb-smaller-flats-resale-prices-tracked
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/hdb-266-million-dollar-resales-so-far-this-year
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/housing/housing-hdb-condo-rents-rise-again
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/housing/sers-%E2%80%A2-revised-occupation-period-rule
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/housing/upgrading-works-start-for-56000-hdb-flats-built-between-1987-and-1997
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/housing/minister-calls-for-prudence
✅ Updated 1 row(s).

Re-scraping: https://www.straitstimes.com/singapore/housing/over-5700-flats-on-offer
✅ Updated 1 row(s).


In [102]:
df.to_excel('Output/ST/final_st_articles_2020_Jan_to_2025_Oct.xlsx', index=False)

# clean scraped data

In [29]:
import pandas as pd
import re

In [30]:
df = pd.read_excel('Output/ST/final_st_articles_2020_Jan_to_2025_Oct.xlsx')

In [39]:
pd.set_option('display.max_colwidth', None)
print(df.loc[df['title'] == "Water tank not tampered with, but probe ongoing", 'content'].to_string(index=False))



In [32]:
# remove irrelevant phrases

lines_to_remove = ["Swipe. Select. Stay informed.",
    "Singapore\n\n",
    "World\n\n",
    "Business\n\n",
    "\n\nBusiness",
    "Asia\n\n",
    "Life\n\n",
    "Already a subscriber? Log in\n\nGet full access to trusted news\n\nUnlock every story, analysis and exclusive from Singapore’s most established newsroom\n\nEnjoy these subscriber benefits",
    "SEE THE BIG STORY", "SEE TOP OF THE NEWS", "SEE HOME",
]

pattern = "|".join(re.escape(line) for line in lines_to_remove)

df['content'] = (
    df['content']
    .str.replace(pattern, "", regex=True)
    #.str.replace(r"\s+", " ", regex=True)  # clean extra spaces
    .str.strip()
)

# remove "SINGAPORE – " or "SINGAPORE - " at the START of the string
df['content'] = df['content'].str.replace("SINGAPORE ( THE NEW PAPER ) - ", '', regex=True)
df['content'] = df['content'].str.replace("SINGAPORE ( THE BUSINESS TIMES ) - ", '', regex=True)
df['content'] = df['content'].str.replace("SINGAPORE (BLOOMBERG) - ", '', regex=True)
df['content'] = df['content'].str.replace(r'^(SINGAPORE\s*[–-]\s*)', '', regex=True)


In [42]:
# all articles are of Singapore context

In [35]:
df.to_excel('Output/ST/cleaned_st_articles_2020_Jan_to_2025_Oct.xlsx', index=False)