In [1]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import csv
import datetime
class VergeScraper:
    def __init__(self):
        self.url = "https://www.theverge.com/"
        self.articles = []
        self.conn = sqlite3.connect('verge.db')
        self.create_table()

    def create_table(self):
        c = self.conn.cursor()
        c.execute('''CREATE TABLE IF NOT EXISTS articles
                     (id INTEGER PRIMARY KEY,
                      url TEXT,
                      headline TEXT,
                      author TEXT,
                      date TEXT)''')
        self.conn.commit()

    def scrape(self):
        page = requests.get(self.url)
        soup = BeautifulSoup(page.content, 'html.parser')
        article_list = soup.find_all('article')

        for article in article_list:
            headline = article.find('h2').get_text().strip()
            url = article.find('a')['href']
            author = article.find('span', class_='c-byline__item').get_text().strip()
            date = article.find('time')['datetime'][:10]
            self.articles.append({'url': url,
                                  'headline': headline,
                                  'author': author,
                                  'date': date})

    def save_to_csv(self):
        date_string = datetime.datetime.now().strftime('%d%m%Y')
        filename = '01032022_verge.csv' + f"{date_string}_verge.csv"
        with open(filename, mode='w', newline='') as csv_file:
            fieldnames = ['id', 'url', 'headline', 'author', 'date']
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
            writer.writeheader()
            for idx, article in enumerate(self.articles):
                writer.writerow({'id': idx,
                                 'url': article['url'],
                                 'headline': article['headline'],
                                 'author': article['author'],
                                 'date': article['date']})

    def save_to_db(self):
        c = self.conn.cursor()
        for idx, article in enumerate(self.articles):
            c.execute('''INSERT OR IGNORE INTO articles
                         (id, url, headline, author, date)
                         VALUES (?, ?, ?, ?, ?)''',
                      (idx, article['url'], article['headline'], article['author'], article['date']))
        self.conn.commit()

if __name__ == "__main__":
    scraper = VergeScraper()
    scraper.scrape()
    scraper.save_to_csv()
    scraper.save_to_db()

