In [9]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urlparse

In [19]:
urls = [
    'https://www.wsj.com/',
    'https://www.cnn.com/',
    'https://www.nytimes.com/',
    'https://www.theguardian.com/international',
    'https://www.reuters.com/news/world'
]

In [20]:
# Initialize an empty list to store the scraped data
articles = []

In [21]:
for url in urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    articles += soup.find_all('article')

In [22]:
data = []
for article in articles:
    article_url = article.find('a')['href']
    if not article_url.startswith('http'):
        article_url = f'https://{urlparse(url).netloc}{article_url}'

    title = article.find('h3')
    if title:
        title = title.text.strip()

    date = article.find('time')
    if date:
        try:
            date = datetime.strptime(date['datetime'], '%Y-%m-%dT%H:%M:%S.%fZ')
            date = date.strftime('%Y-%m-%d %H:%M:%S')
        except ValueError:
            date = None

    author = article.find('span', class_='c-byline__name')
    if author:
        author = author.text.strip()

    content = ''
    content_url = article_url
    content_response = requests.get(content_url)
    content_soup = BeautifulSoup(content_response.content, 'html.parser')
    paragraphs = content_soup.find_all('p')
    for paragraph in paragraphs:
        content += paragraph.text

    data.append({
        'title': title,
        'date': date,
        'author': author,
        'content': content,
        'url': article_url
    })

In [25]:
df = pd.DataFrame(data)
print(df.tail())

                                                title  date author   
19  With abortion in the balance, Wisconsin voters...  None   None  \
20  Chicago to elect new mayor as crime emerges as...  None   None   
21  Australia regulator questions benefits of ANZ ...  None   None   
22  SK Hynix raises $1.7bln in convertible bond as...  None   None   
23  Mincor Resources urges shareholders to back Wy...  None   None   

                                              content   
19  [1/9] Supporters of Wisconsin Supreme Court ca...  \
20  CHICAGO, April 4 (Reuters) - Crime is the cent...   
21  SYDNEY, April 4 (Reuters) - Australia's compet...   
22  SEOUL, April 4 (Reuters) - South Korea's SK Hy...   
23  April 4 (Reuters) - Australian nickel miner Mi...   

                                                  url  
19  https://www.reuters.com/article/usa-election-w...  
20  https://www.reuters.com/article/usa-election-c...  
21  https://www.reuters.com/article/suncorp-m-a-an...  
22  https://