In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd

In [2]:
def export(title:str, content:str, fileName:str, DataFrame:pd.DataFrame): 
  path = f'../sectors-based-txt/industrials-malaysia/{fileName}'
  
  siteName = fileName.replace('.txt', '')
  siteName = ''.join([i for i in siteName if not i.isdigit()])
  
  content = content.split('\n')
  for _ in content:
    if _.strip() == '':
      content.remove(_)
  
  content = '\n'.join(content)
  
  with open(path, 'w') as f:
    f.write(title + '\n\n')
    f.write(content)
    
  new_data = pd.DataFrame({
    'Title': [title],
    'Content': [content],
    'Site-Name': [siteName]
  })

  DataFrame = pd.concat([DataFrame, new_data], ignore_index=True)
  return DataFrame

def export_to_csv(df:pd.DataFrame):
  df.to_csv('../../csvs/industrials-malaysia.csv', index=False)  

In [3]:
df = pd.DataFrame(columns=['Title', 'Content', 'Site-Name'])

In [4]:
options = webdriver.ChromeOptions()

user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
options.add_argument('--headless')
options.add_argument(f'user-agent={user_agent}')

driver = webdriver.Chrome(options=options)

## Article # 1

In [5]:
path = 'https://www.channelnewsasia.com/business/malaysia-allocate-us18-billion-fund-national-industrial-masterplan-3739831'
driver.get(path)

In [6]:
site_title = driver.find_element('xpath', '//h1[@class="h1 h1--page-title"]')
print(site_title.text)

Malaysia to allocate US$1.8 billion to fund national industrial masterplan


In [7]:
contentElement = driver.find_elements('xpath', '//div[@class="text-long"]//p | //div[@class="text-long"]//h2')

content = []
for i in range(len(contentElement)):
  content.append(contentElement[i].text)
  
content = [item for item in content if "ADVERTISEMENT" not in item]
print(content)

["KUALA LUMPUR: Malaysia's trade ministry said on Friday (Sep 1) that the government will allocate RM8.2 billion (US$1.77 billion) to fund a national industrial masterplan to be implemented by 2030.", 'The plan aims to develop a more robust manufacturing sector, by boosting the competitiveness of small and medium-sized businesses and through the creation of high-skilled jobs, the ministry said in a statement.', "Malaysia's Prime Minister Anwar Ibrahim said the plan would require an estimated total investment of RM95 billion, predominantly from the private sector.", '"This creation of high-value job opportunities is crucial to uplift and expand the middle-class society," Anwar said at the launch of the masterplan.', "The plan aims to increase the manufacturing sector's value by 6.5 per cent in seven years and projects employment growth in the sector at 2.3 per cent from 2023, Anwar said."]


In [8]:
df = export(site_title.text, '\n'.join(content), 'channelnewsasia.txt', df)

## Article # 2

In [9]:
path = 'https://www.thestar.com.my/business/business-news/2023/03/10/utilities-sector-continues-to-see-resilient-earnings'
driver.get(path)

In [10]:
site_title = driver.find_element('xpath', '//div[@class="headline story-pg"]/h1')
print(site_title.text)

Utilities sector continues to see resilient earnings


In [11]:
contentElement = driver.find_elements('xpath', '//div[@id="story-body"]/p')
content = []

for i in range (0, len(contentElement)):
  content.append(contentElement[i].text)
print(content)

['KUALA LUMPUR: The utilities sector continues to be attractive for its earnings defensiveness, which is backed by resilient earnings from regulated assets, while recurring cash flows also anchor decent yields of 4% to 5%, according to Kenanga Research.', '“The fourth quarter of 2022 (4Q22) results season spoke eloquently yet again for earnings resilience of regulated assets while variances (both upside and downside) came largely from non-regulated assets,” said the research house.', 'Regarding 4Q22 results season, Kenanga Research said YTL Power International Bhd was the only outperformer with its first half ended Dec 31, 2022 (1H23) results beating its forecasts yet again.', '“This was due to stronger-than-expected performance of its Singapore independent power producers and consistently higher associate income, primarily from its 20% stake in PT Jawa Power, which owns a 1,220MW coal-fired power plant in East Java, Indonesia,” it added.', 'However, it said Tenaga Nasional Bhd’s (TNB)

In [12]:
df = export(site_title.text, '\n'.join(content), 'thestar.txt', df)

## Article # 3

In [13]:
path = 'https://www.freemalaysiatoday.com/category/business/2023/09/11/manufacturing-sector-to-remain-lacklustre-in-second-half-says-fmm/'
driver.get(path)

In [14]:
site_title = driver.find_element('xpath', '//*[@id="__next"]/main/div[2]/div/div/div[1]/section/h1')
# <h1 class="sc-fPXMVe iWDaXK pb-4 mb-0 fw-bold">Kenanga stays optimistic with telco sector following Jendela 1’s success</h1>
print(site_title.text)

Manufacturing sector to remain lacklustre in second half, says FMM


In [15]:
contentElement = driver.find_elements('xpath', '//div[@itemprop="articleBody"]/p')

content = []
for i in range (0, len(contentElement)):
  content.append(contentElement[i].text)
print(content)

['PETALING JAYA: The manufacturing sector in Malaysia has continued to slow down in the first half of 2023 (H1 FY2023), according to a survey by the Federation of Malaysian Manufacturers (FMM).', 'FMM said with the global economy tilted towards the downside, the outlook for the sector in the second half (H2 FY2023) remains cautious.', '“Looking ahead, the sector is likely in anticipation of the persistently weak external conditions and (waiting for) clearer domestic economic policies and directions from the government to help spur higher investments and Malaysia’s growth momentum,” it said.', 'This was gleaned from its FMM Business Conditions survey conducted from July 5 to Aug 18, which drew 351 respondents nationwide.', 'The survey tracked business confidence via the FMM Business Conditions Index (FMM BCI) and covered the actual performance in H1 FY2023 and the outlook for H2 FY2023.', 'The survey showed that all indicators had declined from the previous survey, except for production

In [16]:
df = export(site_title.text, '\n'.join(content), 'freemalaysiatoday.txt', df)

## Article # 4

In [17]:
path = 'https://themalaysianreserve.com/2023/03/02/positive-response-from-transport-sector/'
driver.get(path)

In [18]:
site_title = driver.find_element('xpath', '//div[@class="single-post"]//h2')
print(site_title.text)

Positive response from transport sector


In [19]:
contentElement = driver.find_elements('xpath', '//p[@class="p1"]')
content = []
for _ in contentElement:
  content.append(_.text)
print(content)

['Most industry players and subject matter experts applaud Anwar’s budget for its fairness, transparency and comprehensiveness', 'by AUFA MARDHIAH', 'THE revised Budget 2023 tabled by Prime Minister (PM) Datuk Seri Anwar Ibrahim, who is also the finance minister, received a favourable response from the transport sector.', 'Most industry players and subject matter experts applauded Anwar’s budget for its fairness, transparency and comprehensiveness.', 'To hear further comments, The Malaysian Reserve (TMR) contacted several players and experts to get their views on the Malaysia Madani budget.', 'Universiti Putra Malaysia (UPM) head of the Road Safety Research Centre from the Faculty of Engineering Prof Dr Law Teik Hua was pleased with the allocation for the transport sector.', 'He said it is quite positive in the sense that it helps and contributes to solving many of the country’s transport problems, particularly the three to four items on public transport that the budget highlighted.', 

In [20]:
df = export(site_title.text, '\n'.join(content), 'themalaysianreserve.txt', df)

In [21]:
df = export_to_csv(df)