In [17]:
import pandas as pd
import numpy as np

In [35]:
# -----------------------------
# 1. Open the article page
# -----------------------------
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://omannews.gov.om/topics/en/132/show/125517/ona")
wait = WebDriverWait(driver, 15)

# -----------------------------
# 2. Extract the headline
# -----------------------------
try:
    headline_elem = wait.until(
        EC.presence_of_element_located((By.TAG_NAME, "h1"))
    )
    headline = headline_elem.text
except:
    headline = "N/A"

# -----------------------------
# 3. Extract the published time (XPath: p[1])
# -----------------------------
time_xpath = '//*[@id="contentWrapper"]/div[3]/div[1]/div/div/div[1]/article/div/p[1]'

try:
    published_time_elem = wait.until(
        EC.presence_of_element_located((By.XPATH, time_xpath))
    )
    published_time = published_time_elem.text
except:
    published_time = "N/A"

# -----------------------------
# 4. Extract full article text (all <p> tags)
# -----------------------------
paragraphs = driver.find_elements(By.XPATH, '//*[@id="contentWrapper"]//article//p')
article_text = "\n".join([p.text for p in paragraphs if p.text.strip() != ""])

# -----------------------------
# 5. Close browser
# -----------------------------
driver.quit()

# -----------------------------
# 6. Save to Excel
# -----------------------------
wb = Workbook()
ws = wb.active
ws.title = "ONA Article"

# Header row
ws.append(["Headline", "Published Time", "Article Text"])

# Data row
ws.append([headline, published_time, article_text.strip()])

file_name = "ona_article_p1_safe.xlsx"
wb.save(file_name)

print(f"Saved to {file_name}")

Saved to ona_article_p1_safe.xlsx


In [37]:
df = pd.read_excel('ona_article_p1.xlsx')
df = pd.DataFrame(df)
df

Unnamed: 0,Headline,Published Time,Article Text
0,NEWS,Winners of Green Innovation Hackathon Announce...,Winners of Green Innovation Hackathon Announce...


In [46]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Send GET request to the page
url = "https://rssfeeds.timesofoman.com/index.php/category/oman/"
response = requests.get(url)

# Parse HTML content
soup = BeautifulSoup(response.text, "html.parser")

data = []  # List to store all extracted results

# Find all article sections
articles = soup.find_all("article", class_="post")

for art in articles:
    
    # ------------------ IMAGE EXTRACTION ------------------
    img_tag = art.find("img")  # Locate the <img> tag inside the article
    img_url = None
    img_alt = None

    if img_tag:
        img_url = img_tag.get("src") or img_tag.get("data-src") or img_tag.get("data-lazy-src")  # Image link
        img_alt = img_tag.get("data-alt") or img_tag.get("alt")  # Alternative text

        # Fix links that start with //
        if img_url.startswith("//"):
            img_url = "https:" + img_url

    # Fix links that start with /
    if img_url and img_url.startswith("/"):
        img_url = "https://shabiba.eu-central-1.linodeobjects.com" + img_url

    # ------------------ DATE EXTRACTION ------------------
    date_tag = art.find("h3", class_="date")  # Find date container
    date_text = date_tag.get_text(strip=True) if date_tag else None  # Extract date text

    # ------------------ TITLE + ARTICLE URL ------------------
    title_tag = art.find("h2", class_="post-title")  # Article title section
    if title_tag:
        link = title_tag.find("a")  # Find anchor tag inside the title
        title = link.get_text(strip=True)  # Extract title text
        article_url = link.get("href")  # Get article link

        # Fix relative link
        if article_url.startswith("/"):
            article_url = "https://timesofoman.com" + article_url
    else:
        title = None
        article_url = None

    # ------------------ SUMMARY TEXT ------------------
    summary = art.find("div", class_="post-text")  # Find summary container
    summary_text = summary.get_text(strip=True) if summary else None  # Extract summary text

    # ------------------ STORE DATA ------------------
    data.append({
        "Date": date_text,
        "Title": title,
        "Summary": summary_text,
        "Article_URL": article_url,
        "Image_URL": img_url,
        "Image_Alt": img_alt
    })

# Convert list to DataFrame
df = pd.DataFrame(data)

# Save results to Excel file
df.to_excel("oman_news_images.xlsx", index=False)

In [47]:
df = pd.read_excel('oman_news_images.xlsx')
df = pd.DataFrame(df)
df

Unnamed: 0,Date,Title,Summary,Article_URL,Image_URL,Image_Alt
0,Sunday 16/November/2025 11:02 AM,Oman Airports becomes world’s first to launch ...,Muscat: Oman Airports has announced that it ha...,https://timesofoman.com/article/165086-oman-ai...,https://shabiba.eu-central-1.linodeobjects.com...,Oman Airports becomes world’s first to launch ...
1,Sunday 16/November/2025 10:28 AM,His Majesty confers Oman’s highest civilian ho...,Muscat: His Majesty Sultan Haitham bin Tarik c...,https://timesofoman.com/article/165083-his-maj...,https://shabiba.eu-central-1.linodeobjects.com...,His Majesty confers Oman’s highest civilian ho...
2,Saturday 15/November/2025 15:41 PM,Sayyid Badr receives phone call from Deputy Pr...,Muscat: Foreign Minister Sayyid Badr Hamad Al ...,https://timesofoman.com/article/165062-sayyid-...,https://shabiba.eu-central-1.linodeobjects.com...,Sayyid Badr receives phone call from Deputy Pr...
3,Saturday 15/November/2025 15:30 PM,Oman makes remarkable progress in digital field,Muscat: The Sultanate of Oman is making remark...,https://timesofoman.com/article/165061-oman-ma...,https://shabiba.eu-central-1.linodeobjects.com...,Oman makes remarkable progress in digital field
4,Saturday 15/November/2025 14:06 PM,Dhofar Governorate is moving forward under Ren...,Salalah: The Dhofar Governorate is moving forw...,https://timesofoman.com/article/165058-dhofar-...,https://shabiba.eu-central-1.linodeobjects.com...,Dhofar Governorate is moving forward under Ren...
5,Friday 14/November/2025 16:03 PM,Abundance of Winter crops at Tawi Al-Hara Mark...,Rustaq: The Tawi Al-Hara market in Rustaq Gove...,https://timesofoman.com/article/165043-abundan...,https://shabiba.eu-central-1.linodeobjects.com...,Abundance of Winter crops at Tawi Al-Hara Mark...
6,Friday 14/November/2025 13:07 PM,Environment Authority conducts study to analys...,Muscat: The Environment Authority has conducte...,https://timesofoman.com/article/165036-environ...,https://shabiba.eu-central-1.linodeobjects.com...,Environment Authority conducts study to analys...
7,Friday 14/November/2025 07:53 AM,Foreign Minister discusses regional developmen...,Muscat: Foreign Minister Sayyid Badr Albusaidi...,https://timesofoman.com/article/165030-foreign...,https://shabiba.eu-central-1.linodeobjects.com...,Foreign Minister discusses regional developmen...
8,Friday 14/November/2025 06:33 AM,Oman introduces cultural visa to facilitate kn...,Muscat: The Sultanate of Oman has introduced t...,https://timesofoman.com/article/165029-oman-in...,https://shabiba.eu-central-1.linodeobjects.com...,Oman introduces cultural visa to facilitate kn...
9,Thursday 13/November/2025 19:35 PM,Oman celebrates international day against illi...,Muscat: The Sultanate of Oman is joining the g...,https://timesofoman.com/article/165017-oman-ce...,https://shabiba.eu-central-1.linodeobjects.com...,Oman celebrates international day against illi...
