In [None]:
# Install required packages
!pip install requests beautifulsoup4 pandas tqdm

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
import time
import random

In [None]:
def scrape_wired_article(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(url, headers=headers, timeout=30)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract title
        title = soup.find('h1', class_='content-header__row content-header__hed')
        title = title.text.strip() if title else 'Title not found'

        # Extract main content
        content = soup.find('div', class_='body__inner-container')
        if content:
            paragraphs = content.find_all('p')
            text = ' '.join([p.text for p in paragraphs])
        else:
            text = 'Content not found'

        # Extract author
        author = soup.find('a', class_='byline__name')
        author = author.text.strip() if author else 'Author not found'

        # Extract date
        date = soup.find('time', class_='content-header__publish-date')
        date = date['datetime'] if date else 'Date not found'

        return {
            'url': url,
            'title': title,
            'text': text,
            'author': author,
            'date': date
        }
    except Exception as e:
        print(f"Error processing {url}: {str(e)}")
        return None

In [None]:
def scrape_multiple_articles(urls):
    results = []
    for url in tqdm(urls, desc="Scraping articles"):
        article_data = scrape_wired_article(url)
        if article_data:
            results.append(article_data)
        time.sleep(random.uniform(1, 3))  # Random delay between requests
    return results

In [None]:
# Load your URLs (replace this with your actual method of loading URLs)
urls = [
    "https://www.wired.com/story/applebot-extended-apple-ai-scraping/",
    "https://www.wired.com/story/undress-app-ai-harm-google-apple-login/",
    # Add more URLs here
]

In [None]:
# Scrape the articles
articles = scrape_multiple_articles(urls)


In [None]:
# Convert to DataFrame
df = pd.DataFrame(articles)


In [None]:
df

In [None]:
# Save to CSV
df.to_csv('wired_articles.csv', index=False)