# Yahoo! News Scraper

In [4]:
import csv
from time import sleep
from random import random
import requests
from bs4 import BeautifulSoup

def get_url(search):
    """Generate a url based on the search term"""
    template = 'https://news.search.yahoo.com/search?p={}'
    url = template.format(search)
    return url

def get_article(card):
    """Extract article information from the raw html"""
    headline = card.find('h4', 'title').text.strip()
    source = card.find('span', 's-source').text
    date_posted = card.find('span', 's-time').text.replace('·', '').strip()
    description = card.find('p', 's-desc').text.strip()
    link = card.find('h4', 'title').a.get('href')
        
    article = (headline, source, date_posted, description, link)
    return article

def main(search):
    """Run the main program routine"""
    url = get_url(search)
    articles = []
        
    while True:
        # add random delay to prevent getting blocked from server
        delay = random() * 2
        sleep(delay)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        cards = soup.find_all('div', 'NewsArticle')
        
        # extract articles from page
        for card in cards:
            article = get_article(card)
            articles.append(article)
        
        # find the next page
        try:    
            url = soup.find('a', 'next').get('href')
        except AttributeError:
            break
    
    # save article data
    with open('results.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Headline', 'Source', 'DatePosted', 'Description', 'Link'])
        writer.writerows(articles)

In [5]:
main('iphone')