In [1]:
import json
import requests
import csv
import os
from bs4 import BeautifulSoup
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

def getNewsData(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    news_results = []

    for el in soup.select("div.SoaBEf"):
        news_results.append(
            {
                "link": el.find("a")["href"],
                "title": el.select_one("div.MBeuO").get_text(),
                "snippet": el.select_one(".GI74Re").get_text(),
                "date": el.select_one(".LfVVr").get_text(),
                "source": el.select_one(".NUnG9d span").get_text()
            }
        )

    print(json.dumps(news_results, indent=2))
    return news_results


def savetoCSV(filename, url_page1, url_page2, url_page3):
    # Scrape and append each page
    for url in [url_page1, url_page2, url_page3]:
        news_results = getNewsData(url)
        
        # Check if file exists to handle headers
        file_exists = os.path.isfile(filename)
        
        with open(filename, "a", newline="", encoding="utf-8") as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=["link", "title", "snippet", "date", "source"])
            if not file_exists:
                writer.writeheader()  # Write header only once
            writer.writerows(news_results)

    print(f"Data appended to {filename}")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# File configuration
# Keppel DC REIT (AJBU)
filename = "ajbu_news_data.csv"

url_page1 = "https://www.google.com/search?q=keppel+DC+reit&num=100&gl=sg&tbm=nws&start=0"  
url_page2 = "https://www.google.com/search?q=keppel+DC+reit&num=100&gl=sg&tbm=nws&start=100"  
url_page3 = "https://www.google.com/search?q=keppel+DC+reit&num=100&gl=sg&tbm=nws&start=200"

savetoCSV(filename, url_page1, url_page2, url_page3)

[
  {
    "link": "https://sbr.com.sg/news/keppel-dc-reit-obtains-728m-loan-facility",
    "title": "Keppel DC REIT obtains $72.8m loan facility",
    "snippet": "Keppel DC REIT's subsidiary, Keppel DC REIT Fin. Company Private Limited obtained a $72.8m (EUR50m) loan facility on 18 March 2025.",
    "date": "4 hours ago",
    "source": "Singapore Business Review"
  },
  {
    "link": "https://www.theedgesingapore.com/news/reits/keppel-dc-reit-group-obtains-loan-facility-eur50-mil",
    "title": "Keppel DC REIT group obtains loan facility of EUR50 mil",
    "snippet": "Keppel DC REIT group has obtained a loan facility of EUR50 million ($72.83 million) on March 18, according to a bourse filing.",
    "date": "21 hours ago",
    "source": "The Edge Singapore"
  },
  {
    "link": "https://thesmartinvestor.com.sg/5-potential-singapore-blue-chip-stocks-that-pay-a-dividend-for-2025/",
    "title": "5 Potential Singapore Blue-Chip Stocks that Pay a Dividend for 2025",
    "snippet": "Looking 

In [3]:
# First REIT (AW9U)

filename = "aw9u_news_data.csv"

url_page1 = "https://www.google.com/search?q=First+REIT&num=100&gl=sg&tbm=nws&start=0"
url_page2 = "https://www.google.com/search?q=First+REIT&num=100&gl=sg&tbm=nws&start=100"
url_page3 = "https://www.google.com/search?q=First+REIT&num=100&gl=sg&tbm=nws&start=200"

savetoCSV(filename, url_page1, url_page2, url_page3)


[
  {
    "link": "https://www.mingtiandi.com/real-estate/finance/cvcs-siloam-makes-offer-for-first-reits-indonesia-hospitals/",
    "title": "CVC-Backed Hospital Chain Makes Offer for First REIT\u2019s $599M in Indonesian Assets",
    "snippet": "A potential deal on the way to sell 14 Indonesian hospitals valued at S$820.5 million ($599.3 million) to its private equity-backed tenant.",
    "date": "14 Jan 2025",
    "source": "Mingtiandi"
  },
  {
    "link": "https://www.theedgesingapore.com/billion-dollar-club/billion-dollar-club-2024/first-reit-outlines-blueprint-strategic-resilience",
    "title": "First REIT outlines blueprint for strategic resilience in dynamic healthcare sector",
    "snippet": "\u201cOperationally, First REIT's portfolio of high-quality healthcare properties continue to deliver sustainable performance in the first half of...",
    "date": "14 Nov 2024",
    "source": "The Edge Singapore"
  },
  {
    "link": "https://www.reuters.com/markets/asia/singapores-fir

In [None]:
# Parkway Life REIT (C2PU)

filename = "c2pu_news_data.csv"

url_page1 = "https://www.google.com/search?q=parkway+life+REIT&num=100&gl=sg&tbm=nws&start=0" 
url_page2 = "https://www.google.com/search?q=parkway+life+REIT&num=100&gl=sg&tbm=nws&start=100" 
url_page3 = "https://www.google.com/search?q=parkway+life+REIT&num=100&gl=sg&tbm=nws&start=200" 

savetoCSV(filename, url_page1, url_page2, url_page3)

[
  {
    "link": "https://thesmartinvestor.com.sg/4-singapore-reits-that-can-supply-you-with-dividends-for-life/",
    "title": "4 Singapore REITs That Can Supply You with Dividends for Life",
    "snippet": "We shine the spotlight on four attractive Singapore REITs that can supply you with a lifetime of passive income.",
    "date": "20 Jan 2025",
    "source": "The Smart Investor"
  },
  {
    "link": "https://www.businesstimes.com.sg/companies-markets/maybank-initiates-parkway-life-reit-coverage-buy-call-s4-10-price-target-strong-growth-prospects",
    "title": "Maybank initiates Parkway Life Reit coverage with \u2018buy\u2019 call, S$4.10 price target, on strong growth prospects",
    "snippet": "Maybank initiates Parkway Life Reit coverage with 'buy' call, S$4.10 price target, on strong growth prospects \u00b7 OCBC says healthcare S-Reits...",
    "date": "9 Jan 2025",
    "source": "The Business Times"
  },
  {
    "link": "https://drwealth.com/parkway-life-reit-buys-nursing-hom

In [5]:
# CapitaLand Ascott Trust (HMN)

filename = "hmn_news_data.csv"

url_page1 = "https://www.google.com/search?q=capitaland+ascott+trust&num=100&gl=sg&tbm=nws&start=0"
url_page2 = "https://www.google.com/search?q=capitaland+ascott+trust&num=100&gl=sg&tbm=nws&start=100"
url_page3 = "https://www.google.com/search?q=capitaland+ascott+trust&num=100&gl=sg&tbm=nws&start=200"

savetoCSV(filename, url_page1, url_page2, url_page3)

[
  {
    "link": "https://www.businesstimes.com.sg/companies-markets/capitaland-investment-sells-4-9-stake-capitaland-ascott-trust-s162-million",
    "title": "CapitaLand Investment sells 4.9% stake in CapitaLand Ascott Trust for S$162 million",
    "snippet": "CapitaLand Investment sells 4.9% stake in CapitaLand Ascott Trust for S$162 million \u00b7 CLI to acquire Australian credit investment manager...",
    "date": "19 Dec 2024",
    "source": "The Business Times"
  },
  {
    "link": "https://www.capitaland.com/en/about-capitaland/newsroom/news-releases/international/2025/january/capitaland-ascott-trust-increases-2h-2024-gross-profit-by-8--fro.html",
    "title": "CapitaLand Ascott Trust increases 2H 2024 gross profit by 8% from stronger operating performance and acquisitions",
    "snippet": "CapitaLand Ascott Trust (CLAS) increased its 2H 2024 gross profit by 8% year-on-year (yoy) to S$198.0 million compared to 2H 2023.",
    "date": "1 month ago",
    "source": "CapitaLand"
  }

In [6]:
#AIMS APAC REIT (O5RU)

filename = "o5ru_news_data.csv"

url_page1 = "https://www.google.com/search?q=aims+apac+reit&num=100&gl=sg&tbm=nws&start=0"
url_page2 = "https://www.google.com/search?q=aims+apac+reit&num=100&gl=sg&tbm=nws&start=100"
url_page3 = "https://www.google.com/search?q=aims+apac+reit&num=100&gl=sg&tbm=nws&start=200"

savetoCSV(filename, url_page1, url_page2, url_page3)


[
  {
    "link": "https://www.reitsweek.com/2025/03/aims-apac-reit-raises-sgd125-million-through-perpetual-securities-issuance.html",
    "title": "AIMS APAC REIT raises SGD125 million through perpetual securities issuance",
    "snippet": "AIMS APAC REIT (AA REIT) has announced the successful issuance of SGD125 million in 4.70% subordinated perpetual securities.",
    "date": "19 hours ago",
    "source": "reitsweek"
  },
  {
    "link": "https://www.tipranks.com/news/company-announcements/aims-apac-reit-issues-s125-million-perpetual-securities",
    "title": "AIMS APAC REIT Issues S$125 Million Perpetual Securities",
    "snippet": "AIMS APAC REIT ( ($SG:O5RU) ) has issued an announcement. AIMS APAC REIT has announced the issuance of S$125 million in 4.70% subordinated...",
    "date": "20 hours ago",
    "source": "TipRanks"
  },
  {
    "link": "https://growbeansprout.com/aims-apac-reit-aareit-9m25-dividend",
    "title": "AIMS APAC REIT: Strong rent reversions in Singapore",
    

In [7]:
#Mapletree Industrial Trust (ME8U)

filename = "me8u_news_data.csv"

url_page1 = "https://www.google.com/search?q=mapletree+industrial+trust&num=100&gl=sg&tbm=nws&start=0"
url_page2 = "https://www.google.com/search?q=mapletree+industrial+trust&num=100&gl=sg&tbm=nws&start=100"
url_page3 = "https://www.google.com/search?q=mapletree+industrial+trust&num=100&gl=sg&tbm=nws&start=200"

savetoCSV(filename, url_page1, url_page2, url_page3)

[
  {
    "link": "https://www.businesstimes.com.sg/companies-markets/mapletree-industrial-trust-posts-1-5-rise-q3-dpu-s0-0341",
    "title": "Mapletree Industrial Trust posts 1.5% rise in Q3 DPU to S$0.0341",
    "snippet": "Tan Nai Lun ... THE distribution per unit (DPU) for Mapletree Industrial Trust : ME8U +0.96% (MIT) rose 1.5 per cent to S$0.0341 for its third...",
    "date": "1 month ago",
    "source": "The Business Times"
  },
  {
    "link": "https://drwealth.com/mapletree-industrial-drops-3-after-jpmorgan/",
    "title": "Mapletree Industrial Drops 3% After JPMorgan Downgrade\u2014Time to Worry?",
    "snippet": "As if the current plight of REITs wasn't bad enough, JPMorgan analysts added insult to injury by downgrading Mapletree Industrial Trust...",
    "date": "1 month ago",
    "source": "Dr Wealth"
  },
  {
    "link": "https://thesmartinvestor.com.sg/better-buy-mapletree-industrial-trust-vs-mapletree-logistics-trust/",
    "title": "Better Buy: Mapletree Industrial Tr

In [8]:
# change the date format. the date is in the format of "j min ago ||x hours ago" || "y days ago" || "p weeks ago" || "z months ago" || "a year ago". Change to YYYY-MM-DD format

import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
import re
import os

def convert_date(date_str):
    date_str = str(date_str).strip()
    today = datetime.today()
    
    # Check for relative time formats like "x hours/days/months/years ago"
    match = re.match(r'(\d+)\s+(min|minutes|hour|day|week|month|year)s?\s+ago', date_str, re.IGNORECASE)
    if match:
        num = int(match.group(1))
        unit = match.group(2).lower()
        
        if unit == 'hour':
            delta = relativedelta(hours=num)
        elif unit == 'min' or unit == 'minutes':
            return today.strftime('%Y-%m-%d')  # Return today's date if minutes
        elif unit == 'day':
            delta = relativedelta(days=num)
        elif unit == 'week':
            delta = relativedelta(weeks=num)
        elif unit == 'month':
            delta = relativedelta(months=num)
        elif unit == 'year':
            delta = relativedelta(years=num)
        else:
            return date_str  # Return original if unknown unit
        
        new_date = today - delta
        return new_date.strftime('%Y-%m-%d')
    
    parts = date_str.split()
    if len(parts) == 3:
        day, month, year = parts
        month_truncated = month[:3].capitalize()  # Get first 3 letters and format properl
    
        date_obj = datetime.strptime(f"{day} {month_truncated} {year}", '%d %b %Y')
        return date_obj.strftime('%Y-%m-%d')

    
    # Return original string if no patterns matched
    return date_str


input_path = ['ajbu_news_data.csv', 'aw9u_news_data.csv', 'c2pu_news_data.csv', 'hmn_news_data.csv', 'o5ru_news_data.csv', 'me8u_news_data.csv']
        
 # Read CSV and convert dates
for file in input_path:
    df = pd.read_csv(file)
    df['date'] = df['date'].apply(convert_date)
    df.to_csv(file, index=False)


In [11]:
df.head()

Unnamed: 0,link,title,snippet,date,source
0,https://www.businesstimes.com.sg/companies-mar...,Mapletree Industrial Trust posts 1.5% rise in ...,Tan Nai Lun ... THE distribution per unit (DPU...,2025-02-19,The Business Times
1,https://drwealth.com/mapletree-industrial-drop...,Mapletree Industrial Drops 3% After JPMorgan D...,As if the current plight of REITs wasn't bad e...,2025-02-19,Dr Wealth
2,https://thesmartinvestor.com.sg/better-buy-map...,Better Buy: Mapletree Industrial Trust Vs Mapl...,The winner for this round is MIT as its sister...,2024-12-09,The Smart Investor
3,https://www.reitsweek.com/2025/03/mapletree-in...,Mapletree Industrial Trust sees SGD35.4 millio...,Mapletree Industrial Trust (MIT) was the top S...,2025-03-05,reitsweek
4,https://growbeansprout.com/weekly-market-revie...,Mapletree Industrial Trust and Frasers Logisti...,We share about Mapletree Industrial Trust and ...,2025-03-12,Beansprout


In [12]:
from transformers import pipeline
import pandas as pd


model_name = "ProsusAI/finbert"
classifier = pipeline("text-classification", model=model_name, return_all_scores=True)

 




Device set to use cpu


In [None]:
# Load CSV
# Test for ajbu
df = pd.read_csv('ajbu_news_data.csv')  

# Process titles
sentiments = []
scores = []

for title in df['title']:
    result = classifier(title)[0]  
    # Sort the list of dicts by 'score' in descending order
    sorted_result = sorted(result, key=lambda x: x['score'], reverse=True)
    top = sorted_result[0]  # Get the first item (highest score)
    sentiments.append(top['label'])
    scores.append(top['score'])

df['sentiment'] = sentiments
df['sentiment_score'] = scores
df.to_csv('ajbu_news_data.csv', index=False)

In [14]:
print(result)

[{'label': 'positive', 'score': 0.0368482768535614}, {'label': 'negative', 'score': 0.019962040707468987}, {'label': 'neutral', 'score': 0.9431896209716797}]


In [15]:
df.head()

Unnamed: 0,link,title,snippet,date,source,sentiment,sentiment_score
0,https://sbr.com.sg/news/keppel-dc-reit-obtains...,Keppel DC REIT obtains $72.8m loan facility,"Keppel DC REIT's subsidiary, Keppel DC REIT Fi...",2025-03-19,Singapore Business Review,neutral,0.517661
1,https://www.theedgesingapore.com/news/reits/ke...,Keppel DC REIT group obtains loan facility of ...,Keppel DC REIT group has obtained a loan facil...,2025-03-18,The Edge Singapore,neutral,0.652589
2,https://thesmartinvestor.com.sg/5-potential-si...,5 Potential Singapore Blue-Chip Stocks that Pa...,Looking for up-and-coming blue-chip stocks? He...,2025-03-14,The Smart Investor,neutral,0.938237
3,https://www.tipranks.com/news/company-announce...,Keppel DC REIT Secures EUR 50 Million Loan Fac...,Keppel DC REIT ( ($SG:AJBU) ) has shared an an...,2025-03-18,TipRanks,positive,0.758177
4,https://www.businesstimes.com.sg/companies-mar...,"Stocks to watch: Keppel DC Reit, Centurion, Co...",The accommodation assets operator issued a pro...,2025-02-19,The Business Times,neutral,0.945785


In [None]:
#for the rest of the stocks

files = ['aw9u_news_data.csv', 'c2pu_news_data.csv', 'hmn_news_data.csv', 'o5ru_news_data.csv', 'me8u_news_data.csv']

for file in files:
    df = pd.read_csv(file) 

    # Process titles
    sentiments = []
    scores = []

    for title in df['title']:
        result = classifier(title)[0]  # Get list of sentiment dictionaries
        # Sort the list of dicts by 'score' in descending order
        sorted_result = sorted(result, key=lambda x: x['score'], reverse=True)
        top = sorted_result[0]  # Get the first item (highest score)
        sentiments.append(top['label'])
        scores.append(top['score'])

    df['sentiment'] = sentiments
    df['sentiment_score'] = scores
    df.to_csv(file, index=False)