In [3]:
import requests
from bs4 import BeautifulSoup
import csv
import time

def scrape_sport_news():
    url = 'https://www.bbc.com/sport'
    response = requests.get(url)
    
    # Check if the page was retrieved successfully
    if response.status_code != 200:
        print(f"Failed to retrieve the page, status code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')

    li_classes = [
        'ssrcss-rs7w2i-ListItem e1gp961v0',
        'ssrcss-1dr5icq-ListItem e1gp961v0',
        'ssrcss-1r7nvnf-ListItem e1gp961v0',
        'ssrcss-13h7haz-ListItem e1gp961v0',
        'ssrcss-1amy2cn-ListItem e1gp961v0',
        'ssrcss-vmhflp-ListItem e1gp961v0',
        'ssrcss-8k5g2t-ListItem e1gp961v0',
        'ssrcss-2al3ka-ListItem e1gp961v0',
        'ssrcss-8zf5rb-ListItem e1gp961v0',
        'ssrcss-kftbws-ListItem e1gp961v0',
        'ssrcss-1t3edyj-ListItem e1gp961v0',
        'ssrcss-1wfzqxg-ListItem e1gp961v0'
    ]

    base_url = 'https://www.bbc.com'

    news_data = []
    serial_number_counter = 0

    for li_class in li_classes:
        news_items = soup.find_all('li', class_=li_class)

        for sport in news_items:
            serial_number_counter += 1

            news_sports = sport.find('span', class_='ssrcss-1if1g9v-MetadataText e4wm5bw1')
            news_sports = news_sports.text if news_sports else "N/A"

            news_headline = sport.find('p', class_='ssrcss-6arcww-PromoHeadline exn3ah96')
            news_headline = news_headline.span.text if news_headline and news_headline.span else "N/A"
      
            content_container = sport.find('p', class_='ssrcss-1q0x1qg-Paragraph e1jhz7w10')
            news_content = content_container.text if content_container else "N/A"

            link_container = sport.find('div', class_='ssrcss-1f3bvyz-Stack e1y4nx260')
            news_link = base_url + link_container.a['href'] if link_container and link_container.a else "N/A"

            img_container = sport.find('span', class_='ssrcss-11kpz0x-Placeholder etlorgc0')
            image_url = img_container.find('img')['src'] if img_container and img_container.find('img') else "N/A"

            news_data.append({
                'Serial Number': serial_number_counter,
                'Sports': news_sports,
                'Headline': news_headline,
                'Content': news_content,
                'Link': news_link,
                'Image URL': image_url
            })

    return news_data

def write_to_csv(news_data):
    with open('bbcsports_news.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Serial Number', 'Sports', 'Headline', 'Content', 'Link', 'Image URL']
        csv_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        csv_writer.writeheader()
        csv_writer.writerows(news_data)

if __name__ == '__main__':
    while True:
        sports_news_data = scrape_sport_news()
        if sports_news_data:
            write_to_csv(sports_news_data)
        
        time_wait = 10  # Increase wait time to 10 minutes to avoid overloading the server
        print(f'Waiting {time_wait} minutes...')
        time.sleep(time_wait * 60)

Waiting 10 minutes...
Waiting 10 minutes...


KeyboardInterrupt: 