In [6]:
import requests
import json

# For ET Markets
def get_et_markets_news():
    url = "https://economictimes.indiatimes.com/markets/newslist/2419376.cms"
    response = requests.get(url)
    data = response.json()
    
    for item in data['news']:
        print(item['headline'])

# For Live Mint
def get_livemint_news():
    url = "https://www.livemint.com/ajaxmint/getmorestories"
    params = {
        'section': 'market',
        'pgno': '1',
        'channel': 'market'
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    for item in data['stories']:
        print(item['headline'])

In [7]:
get_et_markets_news()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [8]:
import requests

def get_et_markets_news():
    url = "https://economictimes.indiatimes.com/markets/newslist/2419376.cms"
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Accept": "application/json",
        "Referer": "https://economictimes.indiatimes.com/"
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raises exception for 4XX/5XX errors
        
        # Check if response is actually JSON
        if 'application/json' not in response.headers.get('Content-Type', ''):
            raise ValueError("Response is not JSON")
            
        data = response.json()
        
        for item in data.get('news', []):
            print(item.get('headline'))
            
    except Exception as e:
        print(f"Error: {e}")
        print(f"Response content: {response.text[:200]}...")  # Print first 200 chars

get_et_markets_news()

Error: Response is not JSON
Response content: <!DOCTYPE html><html xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:valurl="com.times.utilities.CMSWebUtility" xmlns:nohtml="com.til.utils.CommonUtils" xmlns:listval="com.indiatimes.cms.utilities.CM...


In [23]:
def scrape_et_tata_news(base_url, max_pages=10):  # ← set max pages
    news = []
    page = 1

    while page <= max_pages:
        url = base_url + ('' if page == 1 else f"?page={page}")
        print(f"Scraping: {url}")
        r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(r.text, 'html.parser')

        items = soup.select('div.eachStory')
        if not items:
            print("No more news found.")
            break

        for item in items:
            title_elem = item.select_one('h3 a')
            if not title_elem:
                continue

            title = title_elem.text.strip()
            link = 'https://economictimes.indiatimes.com' + title_elem['href']
            date_elem = item.select_one('time')
            date = date_elem['datetime'] if date_elem and date_elem.has_attr('datetime') else (
                date_elem.text.strip() if date_elem else None)

            news.append({'title': title, 'url': link, 'date': date})

        page += 1
        time.sleep(1)

    return pd.DataFrame(news)

# Run with limit
df = scrape_et_tata_news("https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms", max_pages=20)
df.to_csv("et_tata_motors_news.csv", index=False)
print(f"✅ Total articles collected: {len(df)}")


Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=2
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=3
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=4
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=5
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=6
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=7
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=8
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/companyid-12934.cms?page=9
Scraping: https://economictimes.indiatimes.com/tata-motors-ltd/stocksupdate/comp

In [26]:
df

Unnamed: 0,title,url,date
0,"Tata Motors, Maruti shares advance up to 3% as...",https://economictimes.indiatimes.comhttps://ec...,"23 Jul, 2025, 12:39PM IST"
1,"Eternal tops Rs 3 lakh crore market cap, surpa...",https://economictimes.indiatimes.comhttps://ec...,"22 Jul, 2025, 12:13PM IST"
2,Stock market update: Stocks that hit 52-week h...,https://economictimes.indiatimes.comhttps://ec...,"22 Jul, 2025, 11:12AM IST"
3,Stock market update: Nifty Auto index falls 0....,https://economictimes.indiatimes.comhttps://ec...,"22 Jul, 2025, 10:14AM IST"
4,Stock market update: Nifty Auto index falls 0.04%,https://economictimes.indiatimes.comhttps://ec...,"17 Jul, 2025, 04:54PM IST"
...,...,...,...
535,Are short-term headwinds from China an opportu...,https://economictimes.indiatimes.comhttps://ec...,"27 Jun, 2025, 02:09PM IST"
536,"Sensex jumps over 250 pts, Nifty above 25,600;...",https://economictimes.indiatimes.comhttps://ec...,"27 Jun, 2025, 09:31AM IST"
537,Stock market update: Nifty Auto index falls 0....,https://economictimes.indiatimes.comhttps://ec...,"26 Jun, 2025, 11:20AM IST"
538,Tata Motors has unveiled the introductory pric...,https://economictimes.indiatimes.comhttps://ec...,"24 Jun, 2025, 09:18AM IST"


In [28]:
df.nunique()

title    27
url      27
date     27
dtype: int64