In [27]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [28]:
# Specify the URL of the BBC News World page
url = "https://www.bbc.com/news/world"

In [29]:
# Fetch the content from the URL
response = requests.get(url)
print(response.text)

<!DOCTYPE html><html lang="en-GB"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>World | Latest News &amp; Updates | BBC News</title><meta property="og:title" content="World | Latest News &amp; Updates | BBC News"/><meta name="twitter:title" content="World | Latest News &amp; Updates | BBC News"/><meta name="description" content="Get all the latest news, live updates and content about the World from across the BBC."/><meta property="og:description" content="Get all the latest news, live updates and content about the World from across the BBC."/><meta name="twitter:description" content="Get all the latest news, live updates and content about the World from across the BBC."/><meta name="msapplication-TileColor" content="#da532c"/><meta name="theme-color" content="#ffffff"/><meta name="robots" content="NOODP, NOYDIR"/><link rel="apple-touch-icon" sizes="180x180" href="/bbcx/apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href

In [30]:
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")

In [31]:
# Find all article headlines
articles = soup.find_all('h2', {'data-testid': 'card-headline'})

In [32]:
print(articles)



In [33]:
# Initialize a list to hold news items
news_items = []

In [34]:
# Loop through each article and extract relevant details
for article in articles:
    news_i = {}
    news_i['headline'] = article.get_text(strip=True)
    description = article.find_next('p', {'data-testid': 'card-description'})
    news_i['paragraph'] = description.get_text(strip=True) if description else "No Description"
    category = article.find_next('span', {'data-testid': 'card-metadata-tag'})
    news_i['category'] = category.get_text(strip=True) if category else "No Category"
    time_info = article.find_next('span', {'data-testid': 'card-metadata-lastupdated'})
    news_i['time'] = time_info.get_text(strip=True) if time_info else "No Time Info"

    news_items.append(news_i)

In [35]:
for article in articles:
    print(article.text)

Japanese atomic bomb survivors win Nobel Peace Prize
'Russians invaded my house and held a soldier captive there'
One dead and 23 rescued after lift malfunction at Colorado gold mine
Australian woman's arm bitten off in dog attack
Japanese atomic bomb survivors win Nobel Peace Prize
Millions without power and thousands rescued from floods after Hurricane Milton
Drone footage shows Hurricane Milton damage in Florida
'Russians invaded my house and held a soldier captive there'
Elon Musk unveils Cybercab at Tesla robotaxi event
Obama urges vote for Harris in 'tight' presidential race
Diddy's trial to start in May 2025
China detains iPhone maker workers in 'strange' case
Student finds scorpion crawling inside Shein parcel 
Watch: Damage assessed after hurricane smashes through Florida
Florida webcams show Hurricane Milton making landfall
Watch: Moment extreme turbulence hits Hurricane Milton hunters
Las Vegas casino imploded to make way for stadium
How do young Palestinians and Israelis se

In [36]:
# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(news_items, columns=['headline', 'paragraph', 'category', 'time'])

In [37]:
# Display the first few entries of the DataFrame
print(df.head())

                                            headline  \
0  Growing fear in Beirut after Israel strikes ci...   
1  Japanese atomic bomb survivors win Nobel Peace...   
2  'Russians invaded my house and held a soldier ...   
3  One dead and 23 rescued after lift malfunction...   
4    Australian woman's arm bitten off in dog attack   

                                           paragraph     category       time  
0  Lebanon says 117 people have been injured in t...       Europe   Just now  
1  Japanese anti-nuclear group wins Nobel Peace p...       Europe   Just now  
2  After Marina Perederii fled her home in Ukrain...       Europe  7 hrs ago  
3  Two groups of tourists were affected by an ele...  US & Canada  2 hrs ago  
4  The woman was rushed to hospital with "life-th...    Australia  5 hrs ago  


In [38]:
# Generate the CSV file from the DataFrame
df.to_csv('Web_ScrapBBC_World.csv', index=False, encoding='utf-8')

print("CSV file 'Web_ScrapBBC_World.csv' has been successfully created.")

CSV file 'Web_ScrapBBC_World.csv' has been successfully created.
