# Importing Libraries 
### For parsing I use this online news website "https://indianexpress.com/"
#### DataFrame having Two columns News_Statement and News_category

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Testing For Single Web Page

In [2]:
url = "https://indianexpress.com/section/india/"
response = requests.get(url)
html_content = response.text


In [3]:
response = requests.get(url)
if response.status_code == 200:
    print("Successfully fetched the webpage!")
else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")

Successfully fetched the webpage!


In [4]:
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

In [5]:
formatted_html = soup.prettify()

In [6]:
for x in soup.find_all('h2'):
    print(x.text.strip())

Modi government believes in democratising technology: IT Minister Ashwini Vaishnaw in Lok Sabha
Parliament Winter Session LIVE Updates: Uproar in both Houses of Parliament amid Centre-Oppn faceoff over Dhankhar removal, Cong-Soros link
Remove Nishikant Dubey’s defamatory, unparliamentary remarks from record: Congress to Om Birla
‘Happy I’m an Indian now’: Pakistan-born Christian man becomes 2nd Goan resident to get citizenship under CAA
‘Unable to pay school fees, scooter EMIs’: Odisha Mission Shakti workers demand release of remuneration
India evacuates 75 of its citizens as rebels take over Syria
Central forces to be deployed in Arunachal villages as strategic hydel project faces continued resistance
8-year-old girl dies of ‘food poisoning’, 35 students of Chhattisgarh residential school fall ill
CJI Khanna underlines need for reforms to promote compassionate, humane justice
Two new buildings in Central Vista won’t have Patel’s design stamp
No-trust move Opposition bid to divert from

In [7]:
# Initialize an empty list to store the extracted news headlines
News_statement = []

# Loop through all <h2> tags and extract the text
for x in soup.find_all('h2'):
    # Extract text and remove leading/trailing spaces
    News_statement.append(x.text.strip())

In [8]:
News_statement

['Modi government believes in democratising technology: IT Minister Ashwini Vaishnaw in Lok Sabha',
 'Parliament Winter Session LIVE Updates: Uproar in both Houses of Parliament amid Centre-Oppn faceoff over Dhankhar removal, Cong-Soros link',
 'Remove Nishikant Dubey’s defamatory, unparliamentary remarks from record: Congress to Om Birla',
 '‘Happy I’m an Indian now’: Pakistan-born Christian man becomes 2nd Goan resident to get citizenship under CAA',
 '‘Unable to pay school fees, scooter EMIs’: Odisha Mission Shakti workers demand release of remuneration',
 'India evacuates 75 of its citizens as rebels take over Syria',
 'Central forces to be deployed in Arunachal villages as strategic hydel project faces continued resistance',
 '8-year-old girl dies of ‘food poisoning’, 35 students of Chhattisgarh residential school fall ill',
 'CJI Khanna underlines need for reforms to promote compassionate, humane justice',
 'Two new buildings in Central Vista won’t have Patel’s design stamp',
 'N

In [9]:
# Create a new list 'category' with the same length as News_statement
News_Category = ['India'] * len(News_statement)

In [10]:
News_Category

['India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India',
 'India']

# A Function that takes two arguments A link for scrapping and a news category And Performs all the above operation For Multiple Links

In [11]:
def fetch_news_and_category(urls, category):
    # Initialize an empty list to store all the news headlines and categories
    all_news_statements = []
    all_news_categories = []

    # Loop through all the URLs
    for url in urls:
        # Fetch the webpage content
        response = requests.get(url)
        if response.status_code == 200:
            print(f"Successfully fetched the webpage from {url}!")
        else:
            print(f"Failed to fetch the webpage from {url}. Status code: {response.status_code}")
            continue
        
        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Loop through all <h2> tags and extract the text
        for x in soup.find_all('h2'):
            # Extract text and remove leading/trailing spaces
            all_news_statements.append(x.text.strip())
            # Append the category for each headline
            all_news_categories.append(category)

    # Ensure the lengths match
    if len(all_news_statements) == len(all_news_categories):
        # Create a DataFrame with all the news headlines and categories
        news_df = pd.DataFrame({
            'News_Headline': all_news_statements,
            'Category': all_news_categories
        })
        return news_df
    else:
        print("Error: The lengths of the news headlines and categories do not match.")
        return None

# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
news_df = fetch_news_and_category(urls_input, category_input)

# Print the resulting DataFrame
if news_df is not None:
    print(news_df)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/india/,https://indianexpress.com/section/india/page/2/,https://indianexpress.com/section/india/page/3/,https://indianexpress.com/section/india/page/4/,https://indianexpress.com/section/india/page/5/,https://indianexpress.com/section/india/page/6/,https://indianexpress.com/section/india/page/7/,https://indianexpress.com/section/india/page/8/,https://indianexpress.com/section/india/page/9/,https://indianexpress.com/section/india/page/10/,https://indianexpress.com/section/india/page/11/,https://indianexpress.com/section/india/page/12/,https://indianexpress.com/section/india/page/13/,https://indianexpress.com/section/india/page/14/,https://indianexpress.com/section/india/page/15/,https://indianexpress.com/section/india/page/16/,https://indianexpress.com/section/india/page/17/,https://indianexpress.com/section/india/page/18/,https://indianexpress.com/section/india/page/19/,https://indianexpress.com/section/india/page/

Successfully fetched the webpage from https://indianexpress.com/section/india/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/india/page/11/!
Successfully fetched the webpage from https://in

In [17]:
news_df.shape

(750, 2)

## Function Call For Political News

In [18]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
sports = fetch_news_and_category(urls_input, category_input)


Enter multiple URLs (comma-separated):  https://indianexpress.com/section/political-pulse/,https://indianexpress.com/section/political-pulse/page/2/,https://indianexpress.com/section/political-pulse/page/3/,https://indianexpress.com/section/political-pulse/page/4/,https://indianexpress.com/section/political-pulse/page/5/,https://indianexpress.com/section/political-pulse/page/6/,https://indianexpress.com/section/political-pulse/page/7/,https://indianexpress.com/section/political-pulse/page/8/,https://indianexpress.com/section/political-pulse/page/9/,https://indianexpress.com/section/political-pulse/page/10/,https://indianexpress.com/section/political-pulse/page/11/,https://indianexpress.com/section/political-pulse/page/12/,https://indianexpress.com/section/political-pulse/page/13/,https://indianexpress.com/section/political-pulse/page/14/,https://indianexpress.com/section/political-pulse/page/15/,https://indianexpress.com/section/political-pulse/page/16/,https://indianexpress.com/sectio

Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/political-pulse/page/10/!
Successfully fetched the webpage fro

In [19]:
sports.head()

Unnamed: 0,News_Headline,Category
0,DMK ally in choppy waters as gamble on ‘lotter...,political
1,"S M Krishna’s journey, from ‘Texas Gowda’ to ‘...",political
2,A bank election under Central forces cover: Be...,political
3,"Why gap between votes secured, seats won in Ma...",political
4,Today in Politics: Maharashtra Cabinet expansi...,political


In [20]:
sports.shape

(750, 2)

## Function call for Entertainment News

In [21]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
Entertainment = fetch_news_and_category(urls_input, category_input)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/entertainment/,https://indianexpress.com/section/entertainment/page/2/,https://indianexpress.com/section/entertainment/page/3/,https://indianexpress.com/section/entertainment/page/4/,https://indianexpress.com/section/entertainment/page/5/,https://indianexpress.com/section/entertainment/page/6/,https://indianexpress.com/section/entertainment/page/7/,https://indianexpress.com/section/entertainment/page/8/,https://indianexpress.com/section/entertainment/page/9/,https://indianexpress.com/section/entertainment/page/10/,https://indianexpress.com/section/entertainment/page/11/,https://indianexpress.com/section/entertainment/page/12/,https://indianexpress.com/section/entertainment/page/13/,https://indianexpress.com/section/entertainment/page/14/,https://indianexpress.com/section/entertainment/page/15/,https://indianexpress.com/section/entertainment/page/16/,https://indianexpress.com/section/entertainment/page/17/,https:/

Successfully fetched the webpage from https://indianexpress.com/section/entertainment/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/entertainment/page/10/!
Successfully fetched the webpage from https://indianexpr

In [22]:
Entertainment.head()

Unnamed: 0,News_Headline,Category
0,Stree 2 actor Mushtaq Khan kidnapped days afte...,entertainment
1,Shabana Azmi recalls shooting ‘awkward’ intima...,entertainment
2,Ranveer Singh’s mother Anju donates hair as De...,entertainment
3,Exclusive | Sunny Deol confirms working in Nit...,entertainment
4,Pushpa 2 worldwide box office collection Day 5...,entertainment


## For Business Related News

In [23]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
Business = fetch_news_and_category(urls_input, category_input)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/business/,https://indianexpress.com/section/business/page/2/,https://indianexpress.com/section/business/page/3/,https://indianexpress.com/section/business/page/4/,https://indianexpress.com/section/business/page/5/,https://indianexpress.com/section/business/page/6/,https://indianexpress.com/section/business/page/7/,https://indianexpress.com/section/business/page/8/,https://indianexpress.com/section/business/page/9/,https://indianexpress.com/section/business/page/10/,https://indianexpress.com/section/business/page/11/,https://indianexpress.com/section/business/page/12/,https://indianexpress.com/section/business/page/13/,https://indianexpress.com/section/business/page/14/,https://indianexpress.com/section/business/page/15/,https://indianexpress.com/section/business/page/16/,https://indianexpress.com/section/business/page/17/,https://indianexpress.com/section/business/page/18/,https://indianexpress.com/section/busine

Successfully fetched the webpage from https://indianexpress.com/section/business/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/business/page/11/!
Successfully fe

In [24]:
Business.head()

Unnamed: 0,News_Headline,Category
0,One MobiKwik Systems IPO fully subscribed,business
1,"Gold Rate Today, 11 December: Gold prices touc...",business
2,Industry should realign itself keeping politic...,business
3,RBI new Governor Sanjay Malhotra’s first agend...,business
4,"Sensex, Nifty swing between high, lows in vola...",business


## For Sports

In [25]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
sports_1 = fetch_news_and_category(urls_input, category_input)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/sports/,https://indianexpress.com/section/sports/page/2/,https://indianexpress.com/section/sports/page/3/,https://indianexpress.com/section/sports/page/4/,https://indianexpress.com/section/sports/page/5/,https://indianexpress.com/section/sports/page/6/,https://indianexpress.com/section/sports/page/7/,https://indianexpress.com/section/sports/page/8/,https://indianexpress.com/section/sports/page/9/,https://indianexpress.com/section/sports/page/10/,https://indianexpress.com/section/sports/page/11/,https://indianexpress.com/section/sports/page/12/,https://indianexpress.com/section/sports/page/13/,https://indianexpress.com/section/sports/page/14/,https://indianexpress.com/section/sports/page/15/,https://indianexpress.com/section/sports/page/16/,https://indianexpress.com/section/sports/page/17/,https://indianexpress.com/section/sports/page/18/,https://indianexpress.com/section/sports/page/19/,https://indianexpress.com/

Successfully fetched the webpage from https://indianexpress.com/section/sports/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/sports/page/11/!
Successfully fetched the webpage from

In [26]:
sports_1.head()

Unnamed: 0,News_Headline,Category
0,India Women vs Australia Women 3rd ODI LIVE Ma...,sports
1,"Treesa-Gayatri show sturdy defense, but run ou...",sports
2,The Hundred: How a prospective sponsorship dea...,sports
3,Google’s Year in Search: Hardik Pandya and Pun...,sports
4,What happens if Gukesh vs Ding Liren World Che...,sports


In [27]:
sports_1.shape

(750, 2)

In [28]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
city = fetch_news_and_category(urls_input, category_input)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/cities/,https://indianexpress.com/section/cities/page/2/,https://indianexpress.com/section/cities/page/3/,https://indianexpress.com/section/cities/page/4/,https://indianexpress.com/section/cities/page/5/,https://indianexpress.com/section/cities/page/6/,https://indianexpress.com/section/cities/page/7/,https://indianexpress.com/section/cities/page/8/,https://indianexpress.com/section/cities/page/9/,https://indianexpress.com/section/cities/page/10/,https://indianexpress.com/section/cities/page/11/,https://indianexpress.com/section/cities/page/12/,https://indianexpress.com/section/cities/page/13/,https://indianexpress.com/section/cities/page/14/,https://indianexpress.com/section/cities/page/15/,https://indianexpress.com/section/cities/page/16/,https://indianexpress.com/section/cities/page/17/,https://indianexpress.com/section/cities/page/18/,https://indianexpress.com/section/cities/page/19/,https://indianexpress.com/

Successfully fetched the webpage from https://indianexpress.com/section/cities/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/cities/page/11/!
Successfully fetched the webpage from

In [29]:
city.head()

Unnamed: 0,News_Headline,Category
0,"Uproar in Parliament over George Soros, no-tru...",city
1,Former Maharashtra DGP Sanjay Pandey appears b...,city
2,Maharashtra medical university to send MBBS qu...,city
3,Pune airport sets new record with 204 flight m...,city
4,"After setback in Assembly bypolls, Congress ge...",city


In [30]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
lifestyle = fetch_news_and_category(urls_input, category_input)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/lifestyle/,https://indianexpress.com/section/lifestyle/page/2/,https://indianexpress.com/section/lifestyle/page/3/,https://indianexpress.com/section/lifestyle/page/4/,https://indianexpress.com/section/lifestyle/page/5/,https://indianexpress.com/section/lifestyle/page/6/,https://indianexpress.com/section/lifestyle/page/7/,https://indianexpress.com/section/lifestyle/page/8/,https://indianexpress.com/section/lifestyle/page/9/,https://indianexpress.com/section/lifestyle/page/10/,https://indianexpress.com/section/lifestyle/page/11/,https://indianexpress.com/section/lifestyle/page/12/,https://indianexpress.com/section/lifestyle/page/13/,https://indianexpress.com/section/lifestyle/page/14/,https://indianexpress.com/section/lifestyle/page/15/,https://indianexpress.com/section/lifestyle/page/16/,https://indianexpress.com/section/lifestyle/page/17/,https://indianexpress.com/section/lifestyle/page/18/,https://indianexpress.

Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/lifestyle/page/11/!
Succ

In [31]:
lifestyle.head()

Unnamed: 0,News_Headline,Category
0,Indian cuisine declared 12th best in the world...,lifestyle
1,‘Ek hi baar kaafi hai’: Rekha shares her take ...,lifestyle
2,Can having one banana a day keep the doctor away?,lifestyle
3,Mark Zuckerberg’s watch — thinner than two sta...,lifestyle
4,People who are good at reading have different ...,lifestyle


## Function Change for Some webpages

In [36]:
def fetch_news_and_category_h3(urls, category):
    # Initialize an empty list to store all the news headlines and categories
    all_news_statements = []
    all_news_categories = []

    # Loop through all the URLs
    for url in urls:
        # Fetch the webpage content
        response = requests.get(url)
        if response.status_code == 200:
            print(f"Successfully fetched the webpage from {url}!")
        else:
            print(f"Failed to fetch the webpage from {url}. Status code: {response.status_code}")
            continue
        
        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Loop through all <h2> tags and extract the text
        for x in soup.find_all('h3'):
            # Extract text and remove leading/trailing spaces
            all_news_statements.append(x.text.strip())
            # Append the category for each headline
            all_news_categories.append(category)

    # Ensure the lengths match
    if len(all_news_statements) == len(all_news_categories):
        # Create a DataFrame with all the news headlines and categories
        news_df_h3 = pd.DataFrame({
            'News_Headline': all_news_statements,
            'Category': all_news_categories
        })
        return news_df_h3
    else:
        print("Error: The lengths of the news headlines and categories do not match.")
        return None

# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
news_df_h3 = fetch_news_and_category_h3(urls_input, category_input)

# Print the resulting DataFrame
if news_df is not None:
    print(news_df_h3)

Enter multiple URLs (comma-separated):  https://indianexpress.com/section/world/,https://indianexpress.com/section/world/page/2/,https://indianexpress.com/section/world/page/3/,https://indianexpress.com/section/world/page/4/,https://indianexpress.com/section/world/page/5/,https://indianexpress.com/section/world/page/6/,https://indianexpress.com/section/world/page/7/,https://indianexpress.com/section/world/page/8/,https://indianexpress.com/section/world/page/9/,https://indianexpress.com/section/world/page/10/,https://indianexpress.com/section/world/page/11/,https://indianexpress.com/section/world/page/12/,https://indianexpress.com/section/world/page/13/,https://indianexpress.com/section/world/page/14/,https://indianexpress.com/section/world/page/15/,https://indianexpress.com/section/world/page/16/,https://indianexpress.com/section/world/page/17/,https://indianexpress.com/section/world/page/18/,https://indianexpress.com/section/world/page/19/,https://indianexpress.com/section/world/page/

Successfully fetched the webpage from https://indianexpress.com/section/world/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/world/page/11/!
Successfully fetched the webpage from https://in

## For Tech Related News

In [37]:
# Ask the user for multiple URLs
urls_input = input("Enter multiple URLs (comma-separated): ").split(',')

# Ask for the category (e.g., Sports, India, etc.)
category_input = input("Enter the category for the news (e.g., Sports, India, etc.): ")

# Call the function with user inputs
tech = fetch_news_and_category_h3(urls_input, category_input)


Enter multiple URLs (comma-separated):  https://indianexpress.com/section/technology/,https://indianexpress.com/section/technology/page/2/,https://indianexpress.com/section/technology/page/3/,https://indianexpress.com/section/technology/page/4/,https://indianexpress.com/section/technology/page/5/,https://indianexpress.com/section/technology/page/6/,https://indianexpress.com/section/technology/page/7/,https://indianexpress.com/section/technology/page/8/,https://indianexpress.com/section/technology/page/9/,https://indianexpress.com/section/technology/page/10/,https://indianexpress.com/section/technology/page/11/,https://indianexpress.com/section/technology/page/12/,https://indianexpress.com/section/technology/page/13/,https://indianexpress.com/section/technology/page/14/,https://indianexpress.com/section/technology/page/15/,https://indianexpress.com/section/technology/page/16/,https://indianexpress.com/section/technology/page/17/,https://indianexpress.com/section/technology/page/18/,http

Successfully fetched the webpage from https://indianexpress.com/section/technology/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/2/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/3/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/4/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/5/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/6/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/7/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/8/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/9/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/page/10/!
Successfully fetched the webpage from https://indianexpress.com/section/technology/pag

In [38]:
tech.head()

Unnamed: 0,News_Headline,Category
0,Sony drops free Gran Turismo game as PlayStati...,tech
1,OpenAI Sora: 9 videos that showcase versatilit...,tech
2,Apple iPhone SE 4: New rumour yet again hints ...,tech
3,Amazon Auto launches in the US; Hyundai cars n...,tech
4,"OpenAI unveils Canvas, a tool for collaborativ...",tech


In [41]:
# Assuming these are the actual DataFrames, not strings
dataframe_list = [news_df, sports, Entertainment, Business, sports_1, city, lifestyle, news_df_h3, tech]

def append_dataframes(dataframe_list):
    """
    Function to append multiple DataFrames into a single DataFrame.
    
    Parameters:
    dataframe_list (list): A list of DataFrames to be appended.
    
    Returns:
    pd.DataFrame: A single DataFrame containing all the data.
    """
    if not dataframe_list:
        print("The list of dataframes is empty.")
        return None
    
    # Concatenate all DataFrames in the list
    final_df = pd.concat(dataframe_list, ignore_index=True)
    
    return final_df

# Example usage:
final_dataframe = append_dataframes(dataframe_list)

# Print the result
print(final_dataframe)


                                          News_Headline Category
0     Modi government believes in democratising tech...    India
1     Parliament Winter Session LIVE Updates: Uproar...    India
2     Remove Nishikant Dubey’s defamatory, unparliam...    India
3     ‘Happy I’m an Indian now’: Pakistan-born Chris...    India
4     ‘Unable to pay school fees, scooter EMIs’: Odi...    India
...                                                 ...      ...
5820  Elon Musk claims US government made SpaceX put...     tech
5821  WhatsApp now lets you store contacts on the ap...     tech
5822  Indian cybersecurity execs call for stronger g...     tech
5823  Google Gemini may soon let you make calls and ...     tech
5824  Qualcomm says Snapdragon 8 Elite will support ...     tech

[5825 rows x 2 columns]


In [42]:
final_dataframe.head()

Unnamed: 0,News_Headline,Category
0,Modi government believes in democratising tech...,India
1,Parliament Winter Session LIVE Updates: Uproar...,India
2,"Remove Nishikant Dubey’s defamatory, unparliam...",India
3,‘Happy I’m an Indian now’: Pakistan-born Chris...,India
4,"‘Unable to pay school fees, scooter EMIs’: Odi...",India


In [43]:
final_dataframe.shape

(5825, 2)

In [44]:
# Save the final dataframe to a CSV file
final_dataframe.to_csv('final_dataframe.csv', index=False)