In [4]:
from bs4 import BeautifulSoup
import requests
import pandas as pd


url = "https://www.bbc.com/news/world"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

headlines = [headline.text.strip() for headline in soup.find_all('h2', {'data-testid': 'card-headline'})]
categories = [category.text.strip() for category in soup.find_all('span', {'data-testid': 'card-metadata-tag'})]
dates_raw = [date.text.strip() for date in soup.find_all('span', {'data-testid': 'card-metadata-lastupdated'})]

dates = [convert_to_datetime(date) for date in dates_raw]


In [5]:
from datetime import datetime, timedelta

def convert_to_datetime(relative_time):
    if 'mins' in relative_time:
        mins = int(relative_time.split()[0])
        date_time = datetime.now() - timedelta(minutes=mins)
    elif 'hrs' in relative_time:
        hours = int(relative_time.split()[0])
        date_time = datetime.now() - timedelta(hours=hours)
    elif 'days' in relative_time:
        days = int(relative_time.split()[0])
        date_time = datetime.now() - timedelta(days=days)
    else:
        # If the date is already in a proper format, just return it
        try:
            date_time = datetime.strptime(relative_time, "%Y-%m-%d")
        except ValueError:
            return None
    return date_time.strftime("%Y/%m/%d %H:%M")

In [7]:

data = []
for i in range(len(headlines)):
    data.append({
        'Headline': headlines[i] if i < len(headlines) else 'N/A',
        'Category': categories[i] if i < len(categories) else 'N/A',
        'Date and Time': dates[i] if i < len(dates) and dates[i] else 'N/A'
    })

df = pd.DataFrame(data)

df.to_csv('bbctime.csv', index=False)

print(df.head())


                                            Headline     Category  \
0  Four killed by tornadoes in Florida, as DeSant...  Middle East   
1  Moment police evacuate family after tree strik...       Europe   
2  Israeli strike on Gaza school sheltering displ...       Tennis   
3    Portugal tries to tempt under-35s with tax cuts  US & Canada   
4          Tennis legend Nadal to retire in November  Middle East   

      Date and Time  
0  2024/10/10 21:12  
1  2024/10/10 17:08  
2  2024/10/10 20:08  
3  2024/10/10 21:20  
4  2024/10/10 21:12  
