Textiles

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Setup parameters
base_url = "https://economictimes.indiatimes.com/industry/cons-products/garments-/-textiles"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
articles = []
page = 1
two_years_ago = datetime.now() - timedelta(days=365*2)

# Loop through paginated pages until no more articles are found or articles are older than 2 years.
while True:
    print(f"Processing page {page}...")
    url = f"{base_url}?curpg={page}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve page, status code:", response.status_code)
        break

    soup = BeautifulSoup(response.content, "html.parser")
    # Find all article blocks; note that the class names might change.
    story_blocks = soup.find_all("div", class_="eachStory")
    if not story_blocks:
        print("No story blocks found on page, ending scraping.")
        break

    page_has_recent_article = False  # flag to check if any article on the page is within 2 years

    for story in story_blocks:
        # Extract headline and link; adjust selectors as needed.
        headline_tag = story.find("h3")
        if not headline_tag:
            continue
        headline = headline_tag.get_text(strip=True)
        link_tag = story.find("a")
        if not link_tag or not link_tag.get("href"):
            continue
        # Build the full URL (Economic Times links are typically relative)
        article_link = "https://economictimes.indiatimes.com" + link_tag.get("href")

        # Extract the published date from the time tag using its attributes.
        time_tag = story.find("time", {"class": "date-format"})
        article_date = None
        if time_tag and time_tag.get("data-time"):
            date_str = time_tag["data-time"]
            print(date_str)
            try:
                # Parse the full date-time string
                article_date = datetime.strptime(date_str, "%b %d, %Y, %I:%M %p IST")
            except Exception as e:
                print(f"Date parsing error for '{date_str}':", e)

        # If a date was found and it's older than 2 years, skip this article.
        if article_date and article_date.date() < two_years_ago.date():
            continue
        else:
            page_has_recent_article = True

        # Fetch the article page for content.
        article_response = requests.get(article_link, headers=headers)
        if article_response.status_code != 200:
            print(f"Failed to retrieve article: {article_link}")
            continue

        article_soup = BeautifulSoup(article_response.content, "html.parser")
        
        # Extract article content.
        content_div = article_soup.find("div", class_="artText")
        content = content_div.get_text(separator="\n", strip=True) if content_div else ""

        articles.append({
            "headline": headline,
            "link": article_link,
            "date": article_date.strftime("%Y-%m-%d") if article_date else "",
            "content": content
        })

        # Print both the article headline and date
        print(f"Scraped article: {headline}")
        print(f"Publication date: {article_date.strftime('%Y-%m-%d') if article_date else 'Date not found'}")
        print("-" * 80)
        
        time.sleep(1)  # Short delay between articles

    # If no recent articles were found on the current page, assume we have reached articles older than 2 years.
    if not page_has_recent_article:
        print("No recent articles found on this page. Ending scraping.")
        break

    page += 1
    time.sleep(2)  # Delay between page requests

# Save the collected data to a CSV file.
df = pd.DataFrame(articles)
output_filename = "et_energy_textile_articles.csv"
df.to_csv(output_filename, index=False)
print(f"Scraping complete! Data saved in '{output_filename}'.")

Processing page 1...
Mar 22, 2025, 12:00 AM IST
Scraped article: Bangladesh's textile industry feels regime change heat
Publication date: 2025-03-22
--------------------------------------------------------------------------------
Mar 19, 2025, 10:17 PM IST
Scraped article: Nawaz Modi Singhania quits Raymond board as director
Publication date: 2025-03-19
--------------------------------------------------------------------------------
Mar 4, 2025, 12:46 AM IST
Scraped article: Stars & stripes on demand: Indian exporters eye boost as US plans another 10% tariff on Chinese imports
Publication date: 2025-03-04
--------------------------------------------------------------------------------
Feb 28, 2025, 04:25 PM IST
Scraped article: SECL inks MoU with Apparel Training & Design Centre for skill training
Publication date: 2025-02-28
--------------------------------------------------------------------------------
Feb 27, 2025, 09:59 PM IST
Scraped article: From handloom to automated machines, 

Retail

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Setup parameters
base_url = "https://economictimes.indiatimes.com/industry/services/retail"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
articles = []
page = 1
two_years_ago = datetime.now() - timedelta(days=365*2)

# Loop through paginated pages until no more articles are found or articles are older than 2 years.
while True:
    print(f"Processing page {page}...")
    url = f"{base_url}?curpg={page}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve page, status code:", response.status_code)
        break

    soup = BeautifulSoup(response.content, "html.parser")
    # Find all article blocks; note that the class names might change.
    story_blocks = soup.find_all("div", class_="eachStory")
    if not story_blocks:
        print("No story blocks found on page, ending scraping.")
        break

    page_has_recent_article = False  # flag to check if any article on the page is within 2 years

    for story in story_blocks:
        # Extract headline and link; adjust selectors as needed.
        headline_tag = story.find("h3")
        if not headline_tag:
            continue
        headline = headline_tag.get_text(strip=True)
        link_tag = story.find("a")
        if not link_tag or not link_tag.get("href"):
            continue
        # Build the full URL (Economic Times links are typically relative)
        article_link = "https://economictimes.indiatimes.com" + link_tag.get("href")

        # Extract the published date from the time tag using its attributes.
        time_tag = story.find("time", {"class": "date-format"})
        article_date = None
        if time_tag and time_tag.get("data-time"):
            date_str = time_tag["data-time"]
            print(date_str)
            try:
                # Parse the full date-time string
                article_date = datetime.strptime(date_str, "%b %d, %Y, %I:%M %p IST")
            except Exception as e:
                print(f"Date parsing error for '{date_str}':", e)

        # If a date was found and it's older than 2 years, skip this article.
        if article_date and article_date.date() < two_years_ago.date():
            continue
        else:
            page_has_recent_article = True

        # Fetch the article page for content.
        article_response = requests.get(article_link, headers=headers)
        if article_response.status_code != 200:
            print(f"Failed to retrieve article: {article_link}")
            continue

        article_soup = BeautifulSoup(article_response.content, "html.parser")
        
        # Extract article content.
        content_div = article_soup.find("div", class_="artText")
        content = content_div.get_text(separator="\n", strip=True) if content_div else ""

        articles.append({
            "headline": headline,
            "link": article_link,
            "date": article_date.strftime("%Y-%m-%d") if article_date else "",
            "content": content
        })

        # Print both the article headline and date
        print(f"Scraped article: {headline}")
        print(f"Publication date: {article_date.strftime('%Y-%m-%d') if article_date else 'Date not found'}")
        print("-" * 80)
        
        time.sleep(1)  # Short delay between articles

    # If no recent articles were found on the current page, assume we have reached articles older than 2 years.
    if not page_has_recent_article:
        print("No recent articles found on this page. Ending scraping.")
        break

    page += 1
    time.sleep(2)  # Delay between page requests

# Save the collected data to a CSV file.
df = pd.DataFrame(articles)
output_filename = "et_energy_retail_articles.csv"
df.to_csv(output_filename, index=False)
print(f"Scraping complete! Data saved in '{output_filename}'.")

Processing page 1...
Mar 28, 2025, 11:37 PM IST
Scraped article: Reliance Industries transfers 100% stake in projects & property arm to Reliance Retail
Publication date: 2025-03-28
--------------------------------------------------------------------------------
Mar 28, 2025, 04:26 PM IST
Scraped article: Indian consumer's dash from e to q: How far can it go?
Publication date: 2025-03-28
--------------------------------------------------------------------------------
Mar 27, 2025, 03:55 PM IST
Scraped article: Bureau of Indian Standards seizes products from Amazon, Flipkart warehouses as crackdown intensifies
Publication date: 2025-03-27
--------------------------------------------------------------------------------
Mar 26, 2025, 10:22 PM IST
Scraped article: Direct selling industry crosses Rs 22,000 crore: Report
Publication date: 2025-03-26
--------------------------------------------------------------------------------
Mar 29, 2025, 05:31 PM IST
Scraped article: Regulatory environme

Media

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Setup parameters
base_url = "https://economictimes.indiatimes.com/industry/media-/-entertainment/media"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
articles = []
page = 1
two_years_ago = datetime.now() - timedelta(days=365*2)

# Loop through paginated pages until no more articles are found or articles are older than 2 years.
while True:
    print(f"Processing page {page}...")
    url = f"{base_url}?curpg={page}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve page, status code:", response.status_code)
        break

    soup = BeautifulSoup(response.content, "html.parser")
    # Find all article blocks; note that the class names might change.
    story_blocks = soup.find_all("div", class_="eachStory")
    if not story_blocks:
        print("No story blocks found on page, ending scraping.")
        break

    page_has_recent_article = False  # flag to check if any article on the page is within 2 years

    for story in story_blocks:
        # Extract headline and link; adjust selectors as needed.
        headline_tag = story.find("h3")
        if not headline_tag:
            continue
        headline = headline_tag.get_text(strip=True)
        link_tag = story.find("a")
        if not link_tag or not link_tag.get("href"):
            continue
        # Build the full URL (Economic Times links are typically relative)
        article_link = "https://economictimes.indiatimes.com" + link_tag.get("href")

        # Extract the published date from the time tag using its attributes.
        time_tag = story.find("time", {"class": "date-format"})
        article_date = None
        if time_tag and time_tag.get("data-time"):
            date_str = time_tag["data-time"]
            print(date_str)
            try:
                # Parse the full date-time string
                article_date = datetime.strptime(date_str, "%b %d, %Y, %I:%M %p IST")
            except Exception as e:
                print(f"Date parsing error for '{date_str}':", e)

        # If a date was found and it's older than 2 years, skip this article.
        if article_date and article_date.date() < two_years_ago.date():
            continue
        else:
            page_has_recent_article = True

        # Fetch the article page for content.
        article_response = requests.get(article_link, headers=headers)
        if article_response.status_code != 200:
            print(f"Failed to retrieve article: {article_link}")
            continue

        article_soup = BeautifulSoup(article_response.content, "html.parser")
        
        # Extract article content.
        content_div = article_soup.find("div", class_="artText")
        content = content_div.get_text(separator="\n", strip=True) if content_div else ""

        articles.append({
            "headline": headline,
            "link": article_link,
            "date": article_date.strftime("%Y-%m-%d") if article_date else "",
            "content": content
        })

        # Print both the article headline and date
        print(f"Scraped article: {headline}")
        print(f"Publication date: {article_date.strftime('%Y-%m-%d') if article_date else 'Date not found'}")
        print("-" * 80)
        
        time.sleep(1)  # Short delay between articles

    # If no recent articles were found on the current page, assume we have reached articles older than 2 years.
    if not page_has_recent_article:
        print("No recent articles found on this page. Ending scraping.")
        break

    page += 1
    time.sleep(2)  # Delay between page requests

# Save the collected data to a CSV file.
df = pd.DataFrame(articles)
output_filename = "et_energy_media_articles.csv"
df.to_csv(output_filename, index=False)
print(f"Scraping complete! Data saved in '{output_filename}'.")

Processing page 1...
Mar 30, 2025, 05:46 PM IST
Scraped article: Planetcast named technology partner by beIN Asia Pacific
Publication date: 2025-03-30
--------------------------------------------------------------------------------
Mar 29, 2025, 03:55 PM IST
Scraped article: Reliance-backed GTPL Hathway signs agreement with MIB to operate HITS platform
Publication date: 2025-03-29
--------------------------------------------------------------------------------
Mar 27, 2025, 10:09 PM IST
Scraped article: Digital media overtakes TV in 2024, contributes 32 pc to overall revenue: FICCI-EY report
Publication date: 2025-03-27
--------------------------------------------------------------------------------
Mar 27, 2025, 02:53 PM IST
Scraped article: Indian TV & digital media set to soar while your cheers for Dhoni & Kohli in IPL grow louder, Vaz says
Publication date: 2025-03-27
--------------------------------------------------------------------------------
Mar 27, 2025, 10:35 PM IST
Scraped

Economy Indicators

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Setup parameters
base_url = "https://economictimes.indiatimes.com/news/economy/indicators"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
articles = []
page = 1
two_years_ago = datetime.now() - timedelta(days=365*2)

# Loop through paginated pages until no more articles are found or articles are older than 2 years.
while True:
    print(f"Processing page {page}...")
    url = f"{base_url}?curpg={page}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve page, status code:", response.status_code)
        break

    soup = BeautifulSoup(response.content, "html.parser")
    # Find all article blocks; note that the class names might change.
    story_blocks = soup.find_all("div", class_="eachStory")
    if not story_blocks:
        print("No story blocks found on page, ending scraping.")
        break

    page_has_recent_article = False  # flag to check if any article on the page is within 2 years

    for story in story_blocks:
        # Extract headline and link; adjust selectors as needed.
        headline_tag = story.find("h3")
        if not headline_tag:
            continue
        headline = headline_tag.get_text(strip=True)
        link_tag = story.find("a")
        if not link_tag or not link_tag.get("href"):
            continue
        # Build the full URL (Economic Times links are typically relative)
        article_link = "https://economictimes.indiatimes.com" + link_tag.get("href")

        # Extract the published date from the time tag using its attributes.
        time_tag = story.find("time", {"class": "date-format"})
        article_date = None
        if time_tag and time_tag.get("data-time"):
            date_str = time_tag["data-time"]
            print(date_str)
            try:
                # Parse the full date-time string
                article_date = datetime.strptime(date_str, "%b %d, %Y, %I:%M %p IST")
            except Exception as e:
                print(f"Date parsing error for '{date_str}':", e)

        # If a date was found and it's older than 2 years, skip this article.
        if article_date and article_date.date() < two_years_ago.date():
            continue
        else:
            page_has_recent_article = True

        # Fetch the article page for content.
        article_response = requests.get(article_link, headers=headers)
        if article_response.status_code != 200:
            print(f"Failed to retrieve article: {article_link}")
            continue

        article_soup = BeautifulSoup(article_response.content, "html.parser")
        
        # Extract article content.
        content_div = article_soup.find("div", class_="artText")
        content = content_div.get_text(separator="\n", strip=True) if content_div else ""

        articles.append({
            "headline": headline,
            "link": article_link,
            "date": article_date.strftime("%Y-%m-%d") if article_date else "",
            "content": content
        })

        # Print both the article headline and date
        print(f"Scraped article: {headline}")
        print(f"Publication date: {article_date.strftime('%Y-%m-%d') if article_date else 'Date not found'}")
        print("-" * 80)
        
        time.sleep(1)  # Short delay between articles

    # If no recent articles were found on the current page, assume we have reached articles older than 2 years.
    if not page_has_recent_article:
        print("No recent articles found on this page. Ending scraping.")
        break

    page += 1
    time.sleep(2)  # Delay between page requests

# Save the collected data to a CSV file.
df = pd.DataFrame(articles)
output_filename = "et_energy_economy-indicators_articles.csv"
df.to_csv(output_filename, index=False)
print(f"Scraping complete! Data saved in '{output_filename}'.")

Processing page 1...
Mar 31, 2025, 06:47 AM IST
Scraped article: India Inc wage bill swells up to 25% as many states hike minimum pay
Publication date: 2025-03-31
--------------------------------------------------------------------------------
Mar 30, 2025, 11:47 AM IST
Scraped article: India’s economy projected to grow at 6.5% in FY26: EY report
Publication date: 2025-03-30
--------------------------------------------------------------------------------
Mar 30, 2025, 11:19 AM IST
Scraped article: RBI to shift focus on growth over inflation in April MPC meet, cut repo rate by 25 bps: Care Edge
Publication date: 2025-03-30
--------------------------------------------------------------------------------
Mar 30, 2025, 04:00 AM IST
Scraped article: Global risks won’t abate, need to be more purpose-oriented: Finance secretary Ajay Seth
Publication date: 2025-03-30
--------------------------------------------------------------------------------
Mar 29, 2025, 01:01 AM IST
Scraped article: Fis

Tech

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Setup parameters
base_url = "https://economictimes.indiatimes.com/small-biz/it/technology/articlelist/47280835.cms"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
articles = []
page = 1
two_years_ago = datetime.now() - timedelta(days=365*2)

# Loop through paginated pages until no more articles are found or articles are older than 2 years.
while True:
    print(f"Processing page {page}...")
    url = f"{base_url}?curpg={page}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve page, status code:", response.status_code)
        break

    soup = BeautifulSoup(response.content, "html.parser")
    # Find all article blocks; note that the class names might change.
    story_blocks = soup.find_all("div", class_="eachStory")
    if not story_blocks:
        print("No story blocks found on page, ending scraping.")
        break

    page_has_recent_article = False  # flag to check if any article on the page is within 2 years

    for story in story_blocks:
        # Extract headline and link; adjust selectors as needed.
        headline_tag = story.find("h3")
        if not headline_tag:
            continue
        headline = headline_tag.get_text(strip=True)
        link_tag = story.find("a")
        if not link_tag or not link_tag.get("href"):
            continue
        # Build the full URL (Economic Times links are typically relative)
        article_link = "https://economictimes.indiatimes.com" + link_tag.get("href")

        # Extract the published date from the time tag using its attributes.
        time_tag = story.find("time", {"class": "date-format"})
        article_date = None
        if time_tag and time_tag.get("data-time"):
            date_str = time_tag["data-time"]
            print(date_str)
            try:
                # Parse the full date-time string
                article_date = datetime.strptime(date_str, "%b %d, %Y, %I:%M %p IST")
            except Exception as e:
                print(f"Date parsing error for '{date_str}':", e)

        # If a date was found and it's older than 2 years, skip this article.
        if article_date and article_date.date() < two_years_ago.date():
            continue
        else:
            page_has_recent_article = True

        # Fetch the article page for content.
        article_response = requests.get(article_link, headers=headers)
        if article_response.status_code != 200:
            print(f"Failed to retrieve article: {article_link}")
            continue

        article_soup = BeautifulSoup(article_response.content, "html.parser")
        
        # Extract article content.
        content_div = article_soup.find("div", class_="artText")
        content = content_div.get_text(separator="\n", strip=True) if content_div else ""

        articles.append({
            "headline": headline,
            "link": article_link,
            "date": article_date.strftime("%Y-%m-%d") if article_date else "",
            "content": content
        })

        # Print both the article headline and date
        print(f"Scraped article: {headline}")
        print(f"Publication date: {article_date.strftime('%Y-%m-%d') if article_date else 'Date not found'}")
        print("-" * 80)
        
        time.sleep(1)  # Short delay between articles

    # If no recent articles were found on the current page, assume we have reached articles older than 2 years.
    if not page_has_recent_article:
        print("No recent articles found on this page. Ending scraping.")
        break

    page += 1
    time.sleep(2)  # Delay between page requests

# Save the collected data to a CSV file.
df = pd.DataFrame(articles)
output_filename = "et_energy_tech_articles.csv"
df.to_csv(output_filename, index=False)
print(f"Scraping complete! Data saved in '{output_filename}'.")

Processing page 1...
Feb 27, 2025, 02:52 PM IST
Scraped article: FanTV raises $3 million to transform AI-powered content creation
Publication date: 2025-02-27
--------------------------------------------------------------------------------
Dec 20, 2024, 12:32 PM IST
Scraped article: AI won’t lead to a Terminator-style showdown: Innover’s Shishir Saxena
Publication date: 2024-12-20
--------------------------------------------------------------------------------
Dec 7, 2024, 10:59 AM IST
Scraped article: Time to be cautious: Unveiling dark patterns and impact on customers
Publication date: 2024-12-07
--------------------------------------------------------------------------------
Nov 28, 2024, 01:58 PM IST
Scraped article: Supervity partners with EvolutIA to transform autonomous enterprises with multi-agent AI architecture
Publication date: 2024-11-28
--------------------------------------------------------------------------------
Nov 27, 2024, 12:21 PM IST
Scraped article: Tech for real

Trends

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Setup parameters
base_url = "https://economictimes.indiatimes.com/news/company/corporate-trends"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
articles = []
page = 1
two_years_ago = datetime.now() - timedelta(days=365*2)

# Loop through paginated pages until no more articles are found or articles are older than 2 years.
while True:
    print(f"Processing page {page}...")
    url = f"{base_url}?curpg={page}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print("Failed to retrieve page, status code:", response.status_code)
        break

    soup = BeautifulSoup(response.content, "html.parser")
    # Find all article blocks; note that the class names might change.
    story_blocks = soup.find_all("div", class_="eachStory")
    if not story_blocks:
        print("No story blocks found on page, ending scraping.")
        break

    page_has_recent_article = False  # flag to check if any article on the page is within 2 years

    for story in story_blocks:
        # Extract headline and link; adjust selectors as needed.
        headline_tag = story.find("h3")
        if not headline_tag:
            continue
        headline = headline_tag.get_text(strip=True)
        link_tag = story.find("a")
        if not link_tag or not link_tag.get("href"):
            continue
        # Build the full URL (Economic Times links are typically relative)
        article_link = "https://economictimes.indiatimes.com" + link_tag.get("href")

        # Extract the published date from the time tag using its attributes.
        time_tag = story.find("time", {"class": "date-format"})
        article_date = None
        if time_tag and time_tag.get("data-time"):
            date_str = time_tag["data-time"]
            print(date_str)
            try:
                # Parse the full date-time string
                article_date = datetime.strptime(date_str, "%b %d, %Y, %I:%M %p IST")
            except Exception as e:
                print(f"Date parsing error for '{date_str}':", e)

        # If a date was found and it's older than 2 years, skip this article.
        if article_date and article_date.date() < two_years_ago.date():
            continue
        else:
            page_has_recent_article = True

        # Fetch the article page for content.
        article_response = requests.get(article_link, headers=headers)
        if article_response.status_code != 200:
            print(f"Failed to retrieve article: {article_link}")
            continue

        article_soup = BeautifulSoup(article_response.content, "html.parser")
        
        # Extract article content.
        content_div = article_soup.find("div", class_="artText")
        content = content_div.get_text(separator="\n", strip=True) if content_div else ""

        articles.append({
            "headline": headline,
            "link": article_link,
            "date": article_date.strftime("%Y-%m-%d") if article_date else "",
            "content": content
        })

        # Print both the article headline and date
        print(f"Scraped article: {headline}")
        print(f"Publication date: {article_date.strftime('%Y-%m-%d') if article_date else 'Date not found'}")
        print("-" * 80)
        
        time.sleep(1)  # Short delay between articles

    # If no recent articles were found on the current page, assume we have reached articles older than 2 years.
    if not page_has_recent_article:
        print("No recent articles found on this page. Ending scraping.")
        break

    page += 1
    time.sleep(2)  # Delay between page requests

# Save the collected data to a CSV file.
df = pd.DataFrame(articles)
output_filename = "et_energy_trends_articles.csv"
df.to_csv(output_filename, index=False)
print(f"Scraping complete! Data saved in '{output_filename}'.")

Processing page 1...
Apr 1, 2025, 12:23 AM IST
Scraped article: Leo Puri steps down from Tata Sons board
Publication date: 2025-04-01
--------------------------------------------------------------------------------
Mar 31, 2025, 11:37 PM IST
Scraped article: Bajaj Hindusthan Sugar close to default: Care Ratings
Publication date: 2025-03-31
--------------------------------------------------------------------------------
Mar 31, 2025, 02:04 PM IST
Scraped article: 86% of Indian CEOs prioritise investments in emerging technology: EY survey
Publication date: 2025-03-31
--------------------------------------------------------------------------------
Mar 31, 2025, 02:00 PM IST
Scraped article: Bosch gets demand notice of over Rs 20 cr from Income Tax dept
Publication date: 2025-03-31
--------------------------------------------------------------------------------
Mar 31, 2025, 12:29 PM IST
Scraped article: India may redraw PLI playbook with 360-degree approach for champion sectors
Publicatio